Skip to content

Commit 9c2782a

Browse files
authored
Merge pull request duckdb#752 from redraiment/main
Updates for the rusty_sheet extension v0.2.0
2 parents b8d3889 + 4ff8e5c commit 9c2782a

2 files changed

Lines changed: 30 additions & 18 deletions

File tree

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
extension:
22
name: rusty_sheet
3-
description: An Excel/OpenDocument Spreadsheets file reader for DuckDB
4-
version: 0.1.0
3+
description: An Excel/WPS/OpenDocument Spreadsheets file reader for DuckDB
4+
version: 0.2.0
55
language: Rust
66
build: cargo
77
license: MIT
@@ -12,33 +12,43 @@ extension:
1212

1313
repo:
1414
github: redraiment/rusty-sheet
15-
ref: 3cc700798071238763b9dc84ed0d16b42ab1e57d
15+
ref: b864536c57efdc7da06b63126fe7cbd349000ee7
1616

1717
docs:
1818
hello_world: |
19-
-- Read entire spreadsheet with headers
19+
-- Read first sheet of spreadsheet with headers
2020
FROM read_sheet('data.xlsx');
2121
22+
-- Read without headers
23+
FROM read_sheet('data.xlsx', header=false);
24+
2225
-- Read specific worksheet
23-
FROM read_sheet('workbook.xlsx', sheet_name='Sheet2');
26+
FROM read_sheet('workbook.xlsx', sheet='Sheet2');
27+
28+
-- Analyze more rows (deafult 10) to detect column types
29+
FROM analyze_sheet('data.xlsx', analyze_rows=20);
2430
2531
-- Override specific column types (others auto-detected)
26-
FROM read_sheet('data.xlsx',
27-
columns={'id': 'bigint'}
28-
);
32+
FROM read_sheet('data.xlsx', columns={'id': 'bigint'});
2933
3034
-- Read specific data range (Excel-style notation)
3135
FROM read_sheet('data.xlsx', range='A2:E100');
3236
33-
-- Read without headers
34-
FROM read_sheet('data.xlsx',
35-
header=false,
36-
columns={'column1': 'varchar', 'column2': 'bigint'}
37-
);
37+
-- Read all worksheets in multiple file types with different extensions
38+
FROM read_sheets(['*.xlsx', '*.ods', '*.et']);
3839
39-
-- Analyze column types
40-
FROM analyze_sheet('data.xlsx', analyze_rows=20);
40+
-- Analyze with wildcard pattern
41+
FROM analyze_sheets(['*.xlsx'], sheets=['Sheet*']);
42+
43+
-- Match specific worksheets only in specific file types
44+
FROM read_sheets(['*.xlsx'], sheets=['*.xlsx=Sheet*']);
45+
46+
-- Track data sources with custom column names
47+
FROM read_sheets(['*.xlsx'], file_name_column='file', sheet_name_column='sheet');
48+
49+
-- Union data by column name instead of position
50+
FROM read_sheets(['*.xlsx'], union_by_name=true);
4151
4252
extended_description: |
43-
The DuckDB rusty-sheet extension that enables reading Excel and OpenDocument spreadsheet files directly within SQL queries. This extension provides seamless integration for analyzing spreadsheet data using DuckDB's powerful SQL engine.
53+
The DuckDB rusty-sheet extension that enables reading Excel, WPS and OpenDocument spreadsheet files directly within SQL queries. This extension provides seamless integration for analyzing spreadsheet data using DuckDB's powerful SQL engine.
4454
For detailed setup and usage instructions, visit the docs at [rusty-sheet](https://github.com/redraiment/rusty-sheet).
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
function,description,comment,example
2-
read_sheet,"Read spreadsheet data and make it available for SQL queries in DuckDB.",,"FROM read_sheet('data.xlsx');"
3-
analyze_sheet,"Analyze spreadsheet files and return column type information.",,"FROM analyze_sheet('data.xlsx', analyze_rows=20);"
2+
analyze_sheet,"Analyzes a single spreadsheet sheet to detect column names and data types.",,"FROM analyze_sheet('data.xlsx', analyze_rows=20);"
3+
analyze_sheets,"Analyzes multiple spreadsheet sheets across files to detect column structures.",,"FROM analyze_sheets(['*.xls']);"
4+
read_sheet,"Reads data from a single spreadsheet sheet into a DuckDB table.",,"FROM read_sheet('data.ods');"
5+
read_sheets,"Reads data from multiple spreadsheet sheets across files into DuckDB tables.",,"FROM read_sheets(['*.et'], union_by_name=true);"

0 commit comments

Comments
 (0)