Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions fourinsight/engineroom/utils/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,16 @@ def __repr__(self):
return f"LocalFileHandler {self._path.resolve()}"

def _pull(self):
return self.write(open(self._path, mode="r").read())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This return that was removed, made the truncate function in the base class truncate differently.

# return self.write(open(self._path, mode="r", encoding=self.encoding).read())
Comment thread
heidi-holm-4ss marked this conversation as resolved.
Outdated
Comment thread
bjorn-einar-bjartnes-4ss marked this conversation as resolved.
Outdated
with open(self._path, mode="r", encoding=self.encoding) as f:
content = f.read()
self.seek(0)
self.truncate(0)
self.write(content)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is where the return was lost


def _push(self):
self._path.parent.mkdir(parents=True, exist_ok=True)
with open(self._path, mode="w") as f:
with open(self._path, mode="w", encoding=self.encoding) as f:
f.write(self.getvalue())


Expand Down Expand Up @@ -343,6 +348,7 @@ def __init__(self, headers, handler=None, indexing_mode="auto"):
raise ValueError("Indexing mode must be 'auto' or 'timestamp'.")

self._dataframe = pd.DataFrame(columns=headers.keys()).astype(self._headers)
self.encoding = getattr(self._handler, "encoding", "utf-8")

def __repr__(self):
return repr(self._dataframe)
Expand Down Expand Up @@ -470,6 +476,7 @@ def pull(self, raise_on_missing=True, strict=True):
parse_dates=True,
dtype=self._headers,
date_format="ISO8601",
encoding=self.encoding,
)

if strict and set(df_source.columns) != set(self._headers.keys()):
Expand Down Expand Up @@ -501,11 +508,19 @@ def push(self):
self._handler.truncate()
try:
self._dataframe.to_csv(
self._handler, sep=",", index=True, lineterminator="\n"
self._handler,
sep=",",
index=True,
lineterminator="\n",
encoding=self.encoding,
)
except TypeError: # for backward compatibility (remove after 2024-06-01)
self._dataframe.to_csv(
self._handler, sep=",", index=True, line_terminator="\n"
self._handler,
sep=",",
index=True,
line_terminator="\n",
encoding=self.encoding,
)
self._handler.push()

Expand Down
54 changes: 54 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,6 +1239,60 @@ def test_delete_rows_truncate_int_both_none(self):

pd.testing.assert_frame_equal(df_out, df_expect)

def test_csv_parsing_matches_pandas(self):
header_names = [
"OrganizationName",
"timestamp",
"timestamp_end",
"dcount_ExternalId",
"serviceAccount",
]

file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod.csv"

headers = {header: str for header in header_names}
handler = LocalFileHandler(file_name)
collector = ResultCollector(headers, handler=handler)
collector.pull(raise_on_missing=True, strict=True)
df = collector.dataframe

df_expected = pd.read_csv(
file_name, index_col=0, encoding="utf-8", dtype=headers
)

assert (
df_expected.iloc[-1]["dcount_ExternalId"]
== df.iloc[-1]["dcount_ExternalId"]
)

assert df_expected.iloc[0]["OrganizationName"] == df.iloc[0]["OrganizationName"]

def test_parsing_norwegian_letters(self):
header_names = [
"OrganizationName",
"timestamp",
"timestamp_end",
"dcount_ExternalId",
"serviceAccount",
]

file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod2.csv"

headers = {header: str for header in header_names}
handler = LocalFileHandler(file_name)
collector = ResultCollector(headers, handler=handler)
collector.pull(raise_on_missing=True, strict=True)
df = collector.dataframe

df_expected = pd.read_csv(
file_name,
index_col=0,
dtype=headers,
encoding="utf-8",
)

assert df_expected.iloc[-1]["serviceAccount"] == df.iloc[-1]["serviceAccount"]


def test__build_download_url(previous_file_names):
app_id = "12345"
Expand Down
7 changes: 7 additions & 0 deletions tests/testdata/drio_sdk_usage_mod.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,
7 changes: 7 additions & 0 deletions tests/testdata/drio_sdk_usage_mod2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False