-
Notifications
You must be signed in to change notification settings - Fork 663
Expand file tree
/
Copy pathexcel.py
More file actions
62 lines (56 loc) · 1.75 KB
/
excel.py
File metadata and controls
62 lines (56 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
FIELD_TYPE_MAP = {
'int64': 'int',
'int32': 'int',
'float64': 'float',
'float32': 'float',
'datetime64': 'datetime',
'datetime64[ns]': 'datetime',
'object': 'string',
'string': 'string',
'bool': 'string',
}
USER_TYPE_TO_PANDAS = {
'int': 'int64',
'float': 'float64',
'datetime': 'datetime64[ns]',
'string': 'string',
}
def infer_field_type(dtype) -> str:
dtype_str = str(dtype)
return FIELD_TYPE_MAP.get(dtype_str, 'string')
def parse_excel_preview(save_path: str, max_rows: int = 10):
sheets_data = []
if save_path.endswith(".csv"):
df = pd.read_csv(save_path, engine='c')
fields = []
for col in df.columns:
fields.append({
"fieldName": col,
"fieldType": infer_field_type(df[col].dtype)
})
preview_data = df.head(max_rows).to_dict(orient='records')
sheets_data.append({
"sheetName": "Sheet1",
"fields": fields,
"data": preview_data,
"rows": len(df)
})
else:
sheet_names = pd.ExcelFile(save_path).sheet_names
for sheet_name in sheet_names:
df = pd.read_excel(save_path, sheet_name=sheet_name, engine='calamine')
fields = []
for col in df.columns:
fields.append({
"fieldName": col,
"fieldType": infer_field_type(df[col].dtype)
})
preview_data = df.head(max_rows).to_dict(orient='records')
sheets_data.append({
"sheetName": sheet_name,
"fields": fields,
"data": preview_data,
"rows": len(df)
})
return sheets_data