4Subsea · branislav-jenco-4ss · Jan 28, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/fourinsight/engineroom/utils/_core.py b/fourinsight/engineroom/utils/_core.py
@@ -143,11 +143,16 @@ def __repr__(self):
         return f"LocalFileHandler {self._path.resolve()}"
 
     def _pull(self):
-        return self.write(open(self._path, mode="r").read())
+        # return self.write(open(self._path, mode="r", encoding=self.encoding).read())
+        with open(self._path, mode="r", encoding=self.encoding) as f:
+            content = f.read()
+        self.seek(0)
+        self.truncate(0)
+        self.write(content)
 
     def _push(self):
         self._path.parent.mkdir(parents=True, exist_ok=True)
-        with open(self._path, mode="w") as f:
+        with open(self._path, mode="w", encoding=self.encoding) as f:
             f.write(self.getvalue())
 
 
@@ -343,6 +348,7 @@ def __init__(self, headers, handler=None, indexing_mode="auto"):
             raise ValueError("Indexing mode must be 'auto' or 'timestamp'.")
 
         self._dataframe = pd.DataFrame(columns=headers.keys()).astype(self._headers)
+        self.encoding = getattr(self._handler, "encoding", "utf-8")
 
     def __repr__(self):
         return repr(self._dataframe)
@@ -470,6 +476,7 @@ def pull(self, raise_on_missing=True, strict=True):
             parse_dates=True,
             dtype=self._headers,
             date_format="ISO8601",
+            encoding=self.encoding,
         )
 
         if strict and set(df_source.columns) != set(self._headers.keys()):
@@ -501,11 +508,19 @@ def push(self):
         self._handler.truncate()
         try:
             self._dataframe.to_csv(
-                self._handler, sep=",", index=True, lineterminator="\n"
+                self._handler,
+                sep=",",
+                index=True,
+                lineterminator="\n",
+                encoding=self.encoding,
             )
         except TypeError:  # for backward compatibility (remove after 2024-06-01)
             self._dataframe.to_csv(
-                self._handler, sep=",", index=True, line_terminator="\n"
+                self._handler,
+                sep=",",
+                index=True,
+                line_terminator="\n",
+                encoding=self.encoding,
             )
         self._handler.push()
 

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -1239,6 +1239,60 @@ def test_delete_rows_truncate_int_both_none(self):
 
         pd.testing.assert_frame_equal(df_out, df_expect)
 
+    def test_csv_parsing_matches_pandas(self):
+        header_names = [
+            "OrganizationName",
+            "timestamp",
+            "timestamp_end",
+            "dcount_ExternalId",
+            "serviceAccount",
+        ]
+
+        file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod.csv"
+
+        headers = {header: str for header in header_names}
+        handler = LocalFileHandler(file_name)
+        collector = ResultCollector(headers, handler=handler)
+        collector.pull(raise_on_missing=True, strict=True)
+        df = collector.dataframe
+
+        df_expected = pd.read_csv(
+            file_name, index_col=0, encoding="utf-8", dtype=headers
+        )
+
+        assert (
+            df_expected.iloc[-1]["dcount_ExternalId"]
+            == df.iloc[-1]["dcount_ExternalId"]
+        )
+
+        assert df_expected.iloc[0]["OrganizationName"] == df.iloc[0]["OrganizationName"]
+
+    def test_parsing_norwegian_letters(self):
+        header_names = [
+            "OrganizationName",
+            "timestamp",
+            "timestamp_end",
+            "dcount_ExternalId",
+            "serviceAccount",
+        ]
+
+        file_name = Path(__file__).parent / "testdata/drio_sdk_usage_mod2.csv"
+
+        headers = {header: str for header in header_names}
+        handler = LocalFileHandler(file_name)
+        collector = ResultCollector(headers, handler=handler)
+        collector.pull(raise_on_missing=True, strict=True)
+        df = collector.dataframe
+
+        df_expected = pd.read_csv(
+            file_name,
+            index_col=0,
+            dtype=headers,
+            encoding="utf-8",
+        )
+
+        assert df_expected.iloc[-1]["serviceAccount"] == df.iloc[-1]["serviceAccount"]
+
 
 def test__build_download_url(previous_file_names):
     app_id = "12345"

diff --git a/tests/testdata/drio_sdk_usage_mod.csv b/tests/testdata/drio_sdk_usage_mod.csv
@@ -0,0 +1,7 @@
+,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
+851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
+855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
+873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
+874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
+879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
+880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,
diff --git a/tests/testdata/drio_sdk_usage_mod2.csv b/tests/testdata/drio_sdk_usage_mod2.csv
@@ -0,0 +1,7 @@
+,OrganizationName,timestamp,timestamp_end,dcount_ExternalId,serviceAccount
+851,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
+855,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
+873,Subsea 7,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,7,False
+874,Vår Energi,2025-12-01 00:00:00+00:00,2025-12-31 00:00:00+00:00,1,False
+879,4Subsea,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False
+880,Unknown,2026-01-01 00:00:00+00:00,2026-01-31 00:00:00+00:00,1,False