Made reviw changes

oree-xx · oree-xx · commit fb29e35949dc · 2026-02-16T11:17:54.000+01:00
diff --git a/scripts/1-fetch/gcs_fetch.py b/scripts/1-fetch/gcs_fetch.py
@@ -99,25 +99,14 @@ def get_search_service():
     )
 
 
-def initialize_data_file(file_path, header):
-    if not os.path.isfile(file_path):
-        with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
-            writer = csv.DictWriter(
-                file_obj, fieldnames=header, dialect="unix"
-            )
-            writer.writeheader()
-
-
 def initialize_all_data_files(args):
-    if not args.enable_save:
-        return
-
-    # Create data directory for this phase
-    os.makedirs(PATHS["data_phase"], exist_ok=True)
-
-    initialize_data_file(FILE1_COUNT, HEADER1_COUNT)
-    initialize_data_file(FILE2_LANGUAGE, HEADER2_LANGUAGE)
-    initialize_data_file(FILE3_COUNTRY, HEADER3_COUNTRY)
+    for file_path, header in [
+        (FILE1_COUNT, HEADER1_COUNT),
+        (FILE2_LANGUAGE, HEADER2_LANGUAGE),
+        (FILE3_COUNTRY, HEADER3_COUNTRY),
+    ]:
+        if not os.path.isfile(file_path):
+            shared.rows_to_csv(args, file_path, header, [])
 
 
 def get_last_completed_plan_index():
@@ -150,8 +139,6 @@ def load_plan():
 
 
 def append_data(args, plan_row, index, count):
-    if not args.enable_save:
-        return
     if plan_row["COUNTRY"]:
         file_path = FILE3_COUNTRY
         fieldnames = HEADER3_COUNTRY
@@ -178,11 +165,7 @@ def append_data(args, plan_row, index, count):
             "TOOL_IDENTIFIER": plan_row["TOOL_IDENTIFIER"],
             "COUNT": count,
         }
-    with open(file_path, "a", encoding="utf-8", newline="\n") as file_obj:
-        writer = csv.DictWriter(
-            file_obj, fieldnames=fieldnames, dialect="unix"
-        )
-        writer.writerow(row)
+    shared.rows_to_csv(args, file_path, fieldnames, [row], append=True)
 
 
 def query_gcs(args, service, last_completed_plan_index, plan):
diff --git a/scripts/1-fetch/github_fetch.py b/scripts/1-fetch/github_fetch.py
@@ -68,7 +68,7 @@ def parse_arguments():
 
 def check_for_completion():
     try:
-        with open(FILE_COUNT, "r", newline="") as file_obj:
+        with open(FILE_COUNT, "r", encoding="utf-8") as file_obj:
             reader = csv.DictReader(file_obj, dialect="unix")
             if len(list(reader)) == len(GITHUB_TOOLS):
                 raise shared.QuantifyingException(
diff --git a/scripts/1-fetch/smithsonian_fetch.py b/scripts/1-fetch/smithsonian_fetch.py
@@ -74,15 +74,15 @@ def check_for_completion():
     completed_units = False
 
     try:
-        with open(FILE_1_METRICS, "r", newline="") as file_obj:
+        with open(FILE_1_METRICS, "r", encoding="utf-8") as file_obj:
             reader = csv.DictReader(file_obj, dialect="unix")
             if len(list(reader)) > 0:
                 completed_metrics = True
     except FileNotFoundError:
         pass  # File may not be found without --enable-save, etc.
 
     try:
-        with open(FILE_2_UNITS, "r", newline="") as file_obj:
+        with open(FILE_2_UNITS, "r", encoding="utf-8") as file_obj:
             reader = csv.DictReader(file_obj, dialect="unix")
             if len(list(reader)) > 30:
                 completed_units = True
diff --git a/scripts/shared.py b/scripts/shared.py
@@ -233,20 +233,22 @@ def paths_list_update(logger, paths_list, old_quarter, new_quarter):
     return paths_list
 
 
-def rows_to_csv(args, file_path, fieldnames, rows):
+def rows_to_csv(args, file_path, fieldnames, rows, append=False):
     """Write rows to a CSV file if saving is enabled."""
     if not args.enable_save:
         return
 
     os.makedirs(os.path.dirname(file_path), exist_ok=True)
 
-    with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
+    mode = "a" if append else "w"
+    with open(file_path, mode, encoding="utf-8", newline="\n") as file_obj:
         writer = csv.DictWriter(
             file_obj,
             fieldnames=fieldnames,
             dialect="unix",
         )
-        writer.writeheader()
+        if not append:
+            writer.writeheader()
         for row in rows:
             writer.writerow(row)