Skip to content

Commit 2a6bdf5

Browse files
heidi-holm-4ssCopilotbjorn-einar-bjartnes-4ssbranislavjenco
authored
Bug in result collector (#60)
* add failling test * add another failing test * typo in last test * update push and pull to use consistent encoding * Update tests/test_core.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * rename test * Add tox to dev dependencies Says in the readme it should work, but does not due to missing dep * Remove commented line * How to install deps and run tests without tox * Avoid rebuilding with pip commands * Try to run tests on CI with only encoding set * Add return back, not correct fix * Need to fix the truncation logic mixing bytes and chars * Flushing and getting current pos to truncate * Black formatting --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Bjørn Einar Bjartnes <88324093+bjorn-einar-bjartnes-4ss@users.noreply.github.com> Co-authored-by: Branislav Jenco <branislavjenco@gmail.com>
1 parent cec237c commit 2a6bdf5

6 files changed

Lines changed: 96 additions & 8 deletions

File tree

DEVELOPERS.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,16 @@ python -m build --outdir ./build
2929
```
3030
sphinx-build -W -b html ./docs ./build/docs
3131
```
32-
32+
## Install deps for running tests
33+
```
34+
pip install -e .
35+
```
3336
## Run tests
3437

3538
```
3639
tox
3740
```
41+
or
42+
```
43+
pytest
44+
```

dev-requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ pydata-sphinx-theme == 0.11.0
66
pytest
77
pytest-cov
88
sphinx == 5.3.0
9-
myst-parser<2.0
9+
myst-parser<2.0
10+
tox

fourinsight/engineroom/utils/_core.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,17 @@ def pull(self, raise_on_missing=True):
4545
current_pos = self.tell()
4646
self.seek(0)
4747
try:
48-
characters_written = self._pull()
48+
self._pull()
4949
except self._SOURCE_NOT_FOUND_ERROR as e:
5050
if raise_on_missing:
5151
self.seek(current_pos)
5252
raise e
5353
else:
5454
self.truncate(0)
5555
else:
56-
self.truncate(characters_written)
56+
self.flush()
57+
pos = self.tell()
58+
self.truncate(pos)
5759

5860
def push(self):
5961
"""
@@ -143,11 +145,11 @@ def __repr__(self):
143145
return f"LocalFileHandler {self._path.resolve()}"
144146

145147
def _pull(self):
146-
return self.write(open(self._path, mode="r").read())
148+
return self.write(open(self._path, mode="r", encoding=self.encoding).read())
147149

148150
def _push(self):
149151
self._path.parent.mkdir(parents=True, exist_ok=True)
150-
with open(self._path, mode="w") as f:
152+
with open(self._path, mode="w", encoding=self.encoding) as f:
151153
f.write(self.getvalue())
152154

153155

@@ -343,6 +345,7 @@ def __init__(self, headers, handler=None, indexing_mode="auto"):
343345
raise ValueError("Indexing mode must be 'auto' or 'timestamp'.")
344346

345347
self._dataframe = pd.DataFrame(columns=headers.keys()).astype(self._headers)
348+
self.encoding = getattr(self._handler, "encoding", "utf-8")
346349

347350
def __repr__(self):
348351
return repr(self._dataframe)
@@ -470,6 +473,7 @@ def pull(self, raise_on_missing=True, strict=True):
470473
parse_dates=True,
471474
dtype=self._headers,
472475
date_format="ISO8601",
476+
encoding=self.encoding,
473477
)
474478

475479
if strict and set(df_source.columns) != set(self._headers.keys()):
@@ -501,11 +505,19 @@ def push(self):
501505
self._handler.truncate()
502506
try:
503507
self._dataframe.to_csv(
504-
self._handler, sep=",", index=True, lineterminator="\n"
508+
self._handler,
509+
sep=",",
510+
index=True,
511+
lineterminator="\n",
512+
encoding=self.encoding,
505513
)
506514
except TypeError: # for backward compatibility (remove after 2024-06-01)
507515
self._dataframe.to_csv(
508-
self._handler, sep=",", index=True, line_terminator="\n"
516+
self._handler,
517+
sep=",",
518+
index=True,
519+
line_terminator="\n",
520+
encoding=self.encoding,
509521
)
510522
self._handler.push()
511523

tests/test_core.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,60 @@ def test_delete_rows_truncate_int_both_none(self):
12391239

12401240
pd.testing.assert_frame_equal(df_out, df_expect)
12411241

1242+
def test_csv_parsing_matches_pandas(self):
1243+
header_names = [
1244+
"OrganizationName",
1245+
"timestamp",
1246+
"timestamp_end",
1247+
"dcount_ExternalId",
1248+
"serviceAccount",
1249+
]
1250+
1251+
file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod.csv"
1252+
1253+
headers = {header: str for header in header_names}
1254+
handler = LocalFileHandler(file_name)
1255+
collector = ResultCollector(headers, handler=handler)
1256+
collector.pull(raise_on_missing=True, strict=True)
1257+
df = collector.dataframe
1258+
1259+
df_expected = pd.read_csv(
1260+
file_name, index_col=0, encoding="utf-8", dtype=headers
1261+
)
1262+
1263+
assert (
1264+
df_expected.iloc[-1]["dcount_ExternalId"]
1265+
== df.iloc[-1]["dcount_ExternalId"]
1266+
)
1267+
1268+
assert df_expected.iloc[0]["OrganizationName"] == df.iloc[0]["OrganizationName"]
1269+
1270+
def test_parsing_norwegian_letters(self):
1271+
header_names = [
1272+
"OrganizationName",
1273+
"timestamp",
1274+
"timestamp_end",
1275+
"dcount_ExternalId",
1276+
"serviceAccount",
1277+
]
1278+
1279+
file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod2.csv"
1280+
1281+
headers = {header: str for header in header_names}
1282+
handler = LocalFileHandler(file_name)
1283+
collector = ResultCollector(headers, handler=handler)
1284+
collector.pull(raise_on_missing=True, strict=True)
1285+
df = collector.dataframe
1286+
1287+
df_expected = pd.read_csv(
1288+
file_name,
1289+
index_col=0,
1290+
dtype=headers,
1291+
encoding="utf-8",
1292+
)
1293+
1294+
assert df_expected.iloc[-1]["serviceAccount"] == df.iloc[-1]["serviceAccount"]
1295+
12421296

12431297
def test__build_download_url(previous_file_names):
12441298
app_id = "12345"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
2+
851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
3+
855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
4+
873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
5+
874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
6+
879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
7+
880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
2+
851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
3+
855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
4+
873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
5+
874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
6+
879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
7+
880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False

0 commit comments

Comments
 (0)