Skip to content

Commit 0f8e4ed

Browse files
committed
Fix EmptyDataError when reading empty CSV files
Handle pandas.errors.EmptyDataError that occurs when pd.read_csv() encounters an existing but empty CSV file. This fix catches the error in both 'append' and 'update' modes and treats the empty file as if it doesn't exist, creating a new file with the current data. The fix prevents crashes when emission CSV files exist but are empty, which can happen if a previous write operation was interrupted. Added test cases for both append and update modes with empty files.
1 parent 78baaa7 commit 0f8e4ed

2 files changed

Lines changed: 42 additions & 2 deletions

File tree

codecarbon/output_methods/file.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,30 @@ def out(self, total: EmissionsData, _):
9696
if not file_exists:
9797
df = new_df
9898
elif self.on_csv_write == "append":
99-
df = pd.read_csv(self.save_file_path)
99+
try:
100+
df = pd.read_csv(self.save_file_path)
101+
except pd.errors.EmptyDataError:
102+
logger.warning(
103+
f"File {self.save_file_path} exists but is empty. Creating new file."
104+
)
105+
df = new_df
106+
df.to_csv(self.save_file_path, index=False)
107+
return
100108
# Filter out empty or all-NA columns, to avoid warnings from Pandas,
101109
# see https://github.com/pandas-dev/pandas/issues/55928
102110
df = df.dropna(axis=1, how="all")
103111
new_df = new_df.dropna(axis=1, how="all")
104112
df = pd.concat([df, new_df])
105113
else:
106-
df = pd.read_csv(self.save_file_path)
114+
try:
115+
df = pd.read_csv(self.save_file_path)
116+
except pd.errors.EmptyDataError:
117+
logger.warning(
118+
f"File {self.save_file_path} exists but is empty. Creating new file."
119+
)
120+
df = new_df
121+
df.to_csv(self.save_file_path, index=False)
122+
return
107123
df_run = df.loc[df.run_id == total.run_id]
108124
if len(df_run) < 1:
109125
df = pd.concat([df, new_df])

tests/output_methods/test_file.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,30 @@ def test_file_output_out_update_file_exists_one_matchingrows(self):
173173
# file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
174174
# file_output.out(self.emissions_data, None)
175175

176+
def test_file_output_out_append_empty_file_exists(self):
177+
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
178+
# Create an empty file
179+
with open(file_output.save_file_path, "w") as _:
180+
pass
181+
182+
# This should not raise an error
183+
file_output.out(self.emissions_data, None)
184+
185+
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
186+
self.assertEqual(len(df), 1)
187+
188+
def test_file_output_out_update_empty_file_exists(self):
189+
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update")
190+
# Create an empty file
191+
with open(file_output.save_file_path, "w") as _:
192+
pass
193+
194+
# This should not raise an error
195+
file_output.out(self.emissions_data, None)
196+
197+
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
198+
self.assertEqual(len(df), 1)
199+
176200
def test_file_output_task_out(self):
177201
task_emissions_data = [
178202
TaskEmissionsData(

0 commit comments

Comments
 (0)