Skip to content

Commit aeff74e

Browse files
author
kx79wq
committed
fix: fix spark4 unit tests
1 parent 86c5ade commit aeff74e

3 files changed

Lines changed: 23 additions & 23 deletions

File tree

histogrammar/dfinterface/spark_histogrammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def construct_empty_hist(self, df, features):
225225
for idx, col in enumerate(revcols):
226226
# histogram type depends on the data type
227227
dt = self.var_dtype[col]
228-
quant = df[col]
228+
quant = f.col(col)
229229
hist = self.get_hist_bin(hist, features, quant, col, dt)
230230

231231
return hist

histogrammar/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def __init__(self, expr, name=None):
247247
ok = False
248248
else:
249249
if isinstance(expr, Column) and self.name is None:
250-
self.name = str(expr)[7:-1]
250+
self.name = str(expr)[8:-2]
251251
ok = True
252252
if not ok:
253253
raise TypeError(f"quantity ({expr}) must be a string, function, or SparkSQL Column")

tests/test_spark_histogrammar.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,16 @@ def spark_co():
5353
@pytest.mark.skipif(not spark_found, reason="spark not found")
5454
@pytest.mark.filterwarnings("ignore:createDataFrame attempted Arrow optimization because")
5555
def test_get_histograms(spark_co):
56-
pytest.age["data"]["name"] = "'age'"
57-
pytest.company["data"]["name"] = "'company'"
58-
pytest.eyesColor["data"]["name"] = "'eyeColor'"
59-
pytest.gender["data"]["name"] = "'gender'"
60-
pytest.isActive["data"]["name"] = "'isActive'"
61-
pytest.latitude["data"]["name"] = "'latitude'"
62-
pytest.longitude["data"]["name"] = "'longitude'"
63-
pytest.transaction["data"]["name"] = "'transaction'"
64-
65-
pytest.latitude_longitude["data"]["name"] = "'latitude:longitude'"
56+
pytest.age["data"]["name"] = "age"
57+
pytest.company["data"]["name"] = "company"
58+
pytest.eyesColor["data"]["name"] = "eyeColor"
59+
pytest.gender["data"]["name"] = "gender"
60+
pytest.isActive["data"]["name"] = "isActive"
61+
pytest.latitude["data"]["name"] = "latitude"
62+
pytest.longitude["data"]["name"] = "longitude"
63+
pytest.transaction["data"]["name"] = "transaction"
64+
65+
pytest.latitude_longitude["data"]["name"] = "latitude:longitude"
6666
pytest.latitude_longitude["data"]["bins:name"] = "unit_func"
6767

6868
spark = spark_co
@@ -113,15 +113,15 @@ def test_get_histograms(spark_co):
113113
@pytest.mark.skipif(not spark_found, reason="spark not found")
114114
@pytest.mark.filterwarnings("ignore:createDataFrame attempted Arrow optimization because")
115115
def test_get_histograms_module(spark_co):
116-
pytest.age["data"]["name"] = "'age'"
117-
pytest.company["data"]["name"] = "'company'"
118-
pytest.eyesColor["data"]["name"] = "'eyeColor'"
119-
pytest.gender["data"]["name"] = "'gender'"
120-
pytest.isActive["data"]["name"] = "'isActive'"
121-
pytest.latitude["data"]["name"] = "'latitude'"
122-
pytest.longitude["data"]["name"] = "'longitude'"
123-
124-
pytest.latitude_longitude["data"]["name"] = "'latitude:longitude'"
116+
pytest.age["data"]["name"] = "age"
117+
pytest.company["data"]["name"] = "company"
118+
pytest.eyesColor["data"]["name"] = "eyeColor"
119+
pytest.gender["data"]["name"] = "gender"
120+
pytest.isActive["data"]["name"] = "isActive"
121+
pytest.latitude["data"]["name"] = "latitude"
122+
pytest.longitude["data"]["name"] = "longitude"
123+
124+
pytest.latitude_longitude["data"]["name"] = "latitude:longitude"
125125
pytest.latitude_longitude["data"]["bins:name"] = "unit_func"
126126

127127
spark = spark_co
@@ -196,7 +196,7 @@ def test_get_histograms_timestamp(spark_co):
196196
"bins": {"108": 9.0, "109": 1.0},
197197
"bins:type": "Count",
198198
"entries": 10.0,
199-
"name": "'dt'",
199+
"name": "dt",
200200
"nanflow": 0.0,
201201
"nanflow:type": "Count",
202202
"origin": 1.2625632e18,
@@ -238,7 +238,7 @@ def test_get_histograms_date(spark_co):
238238
"bins": {"108": 9.0, "109": 1.0},
239239
"bins:type": "Count",
240240
"entries": 10.0,
241-
"name": "'dt'",
241+
"name": "dt",
242242
"nanflow": 0.0,
243243
"nanflow:type": "Count",
244244
"origin": 1.2625632e18,

0 commit comments

Comments
 (0)