Skip to content

Commit d09b945

Browse files
committed
example
1 parent 06dd647 commit d09b945

1 file changed

Lines changed: 87 additions & 0 deletions

File tree

column_name_test.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import os.path
2+
3+
from pyiceberg.catalog.sql import SqlCatalog
4+
import pyarrow as pa
5+
import pandas as pd
6+
7+
8+
def sanitize_ch_names(ch_name: str) -> str:
9+
""" Helper func to sanitize the column/channel names """
10+
chars_to_replace = [":", ".", "-", "/"]
11+
sanitized = ch_name
12+
for char in chars_to_replace:
13+
sanitized = sanitized.replace(char, "_")
14+
sanitized = sanitized.lower()
15+
return sanitized
16+
17+
18+
"""
19+
Simple logic to create dataframe and save it to iceberg table.
20+
Showcases issues with column name in pyarrow unless sanitized
21+
"""
22+
23+
# Verify warehouse folder exists
24+
if not os.path.exists("warehouse"):
25+
os.mkdir("warehouse")
26+
27+
data = {
28+
'TEST:A1B2.RAW.ABC-GG-1-A': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
29+
'TEST:A1B2.RAW.ABC-GG-1-B': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9],
30+
'TEST:A1B2.RAW.ABC-GG-1-C': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9],
31+
'time': [
32+
1702090722998897808,
33+
1702090722998947809,
34+
1702090722998997809,
35+
1702090722999047809,
36+
1702090722999097809,
37+
1702090722999147809,
38+
1702090722999197809,
39+
1702090722999247809,
40+
1702090722999297809,
41+
1702090722999347809
42+
]
43+
}
44+
45+
df = pd.DataFrame(data)
46+
pa_data = pa.Table.from_pandas(df)
47+
48+
"""
49+
Uncomment to sanitize the channel names and make it work.
50+
Delete the contents in warehouse folder and rerun.
51+
"""
52+
# ch_name_swap = dict()
53+
# for ch_name in pa_data.column_names:
54+
# ch_name_swap[ch_name] = sanitize_ch_names(ch_name)
55+
# pa_data = pa_data.rename_columns(ch_name_swap.values())
56+
57+
# iceberg warehouse local sqlite
58+
warehouse_path = "/tmp/warehouse"
59+
catalog = SqlCatalog(
60+
"default",
61+
**{
62+
"uri": f"sqlite:///{warehouse_path}/pyiceberg-catalog.db",
63+
"warehouse": f"file://{warehouse_path}"
64+
}
65+
)
66+
67+
try:
68+
catalog.create_namespace("A1B2")
69+
except:
70+
pass
71+
72+
try:
73+
catalog.create_table("A1B2.A1-301", schema=pa_data.schema)
74+
except:
75+
pass
76+
77+
table = catalog.load_table("A1B2.A1-301")
78+
table.overwrite(pa_data)
79+
80+
# This causes the error pyarrow.lib.ArrowInvalid:
81+
# No match for FieldRef.Name(A1B2_x2ERAW_x2EABC_x2DGG_x2D1_x2DA) in A1B2.RAW.ABC-GG-1-A: double
82+
df_pandas = table.scan().to_pandas()
83+
df_pyarrow = table.scan().to_arrow()
84+
85+
print(df_pandas)
86+
print(df_pyarrow)
87+

0 commit comments

Comments
 (0)