Skip to content

Commit 26415e4

Browse files
authored
Handle decimals and date types (#53)
* handle decimals * lint * Cast to timestamp
1 parent 7bfd7bb commit 26415e4

4 files changed

Lines changed: 82 additions & 2 deletions

File tree

src/panel_gwalker/_gwalker.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
SPECTYPES,
3636
SpecType,
3737
_raw_fields,
38+
cast_to_supported_dtypes,
3839
configure_debug_log_level,
3940
logger,
4041
process_spec,
@@ -432,6 +433,10 @@ def _compute(self, payload):
432433
result = pd.DataFrame()
433434

434435
df = pd.DataFrame.from_records(result)
436+
437+
# Convert any Decimal objects to float
438+
df = cast_to_supported_dtypes(df)
439+
435440
logger.debug("response:\n%s", df)
436441
return {col: df[col].values for col in df.columns}
437442

src/panel_gwalker/_tabular_data.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from narwhals.typing import FrameT, IntoFrame
1616
from panel.io.datamodel import PARAM_MAPPING
1717

18+
from ._utils import cast_to_supported_dtypes
19+
1820
TabularDataType = IntoFrame
1921

2022

@@ -45,7 +47,7 @@ def _validate(self, val):
4547
def _column_datasource_from_tabular_df(data: FrameT):
4648
if isinstance(data, nw.LazyFrame):
4749
data = data.collect()
48-
data = data.to_pandas()
50+
data = cast_to_supported_dtypes(data.to_pandas())
4951
return ColumnDataSource._data_from_df(data)
5052

5153

src/panel_gwalker/_utils.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
import datetime
2+
import decimal
13
import json
24
import logging
35
import os
46
import sys
57
from pathlib import Path
8+
from typing import Any, Union
69

710
import narwhals as nw
811
import pandas as pd
@@ -11,6 +14,35 @@
1114
from narwhals.dataframe import LazyFrame
1215
from narwhals.typing import FrameT
1316

17+
18+
def cast_to_supported_dtypes(df: pd.DataFrame, sample: int = 100) -> pd.DataFrame:
19+
"""
20+
Convert decimal.Decimal to float in a pandas DataFrame, as
21+
Bokeh ColumnDataSource does not support decimal.Decimal.
22+
Samples only a subset of the DataFrame to check for decimal.Decimal
23+
24+
Arguments
25+
---------
26+
df (pd.DataFrame):
27+
the DataFrame to convert
28+
sample (int):
29+
number of rows to sample to check for decimal.Decimal
30+
"""
31+
df = df.copy()
32+
for col in df.select_dtypes(include=["object"]).columns:
33+
df_col_sample = df[col].sample(min(sample, len(df)))
34+
try:
35+
if df_col_sample.apply(lambda x: isinstance(x, decimal.Decimal)).any():
36+
df[col] = pd.to_numeric(df[col], errors="coerce")
37+
if df_col_sample.apply(
38+
lambda x: isinstance(x, (datetime.datetime, datetime.date))
39+
).any():
40+
df[col] = pd.to_datetime(df[col], errors="coerce")
41+
except Exception:
42+
df[col] = df[col].astype(str)
43+
return df
44+
45+
1446
logger = logging.getLogger("panel-graphic-walker")
1547
FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
1648
from narwhals.typing import FrameT
@@ -90,7 +122,7 @@ def _raw_fields(data: FrameT) -> list[dict]:
90122
except Exception as ex:
91123
pass
92124

93-
pandas_data = data.to_pandas()
125+
pandas_data = cast_to_supported_dtypes(data.to_pandas())
94126
return _raw_fields_core(pandas_data)
95127

96128

tests/test_tabular_data.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
import datetime
2+
import decimal
3+
4+
import numpy as np
5+
import pandas as pd
16
import param
27
import pytest
38

49
from panel_gwalker._tabular_data import TabularData, _column_datasource_from_tabular_df
10+
from panel_gwalker._utils import cast_to_supported_dtypes
511

612

713
class MyClass(param.Parameterized):
@@ -20,3 +26,38 @@ def test_tabular_data_raises():
2026

2127
def test_column_datasource_from_tabular_df(data):
2228
assert _column_datasource_from_tabular_df(data)
29+
30+
31+
def test_decimal_conversion():
32+
df = pd.DataFrame(
33+
{
34+
"price": [decimal.Decimal("10.50"), decimal.Decimal("25.75")],
35+
"qty": [5, 10],
36+
"name": ["Item A", "Item B"],
37+
}
38+
)
39+
40+
converted_df = cast_to_supported_dtypes(df)
41+
42+
assert isinstance(converted_df["price"][0], float)
43+
assert not isinstance(converted_df["price"][0], decimal.Decimal)
44+
assert converted_df["price"][0] == 10.5
45+
46+
47+
def test_date_conversion():
48+
df = pd.DataFrame(
49+
{
50+
"date": pd.to_datetime(
51+
[
52+
datetime.date(2020, 1, 1) + datetime.timedelta(days=i)
53+
for i in range(3)
54+
]
55+
),
56+
"value": np.random.randn(3).cumsum(),
57+
}
58+
)
59+
converted_df = cast_to_supported_dtypes(df)
60+
print(df)
61+
62+
assert isinstance(converted_df["date"][0], pd.Timestamp)
63+
assert converted_df["date"][0] == pd.Timestamp("2020-01-01")

0 commit comments

Comments
 (0)