Skip to content

Commit 5b8a47b

Browse files
committed
Sort CSV headers before comparing so that we don't spuriously create backup output files.
1 parent 82c9cfb commit 5b8a47b

2 files changed

Lines changed: 32 additions & 18 deletions

File tree

codecarbon/output_methods/file.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ def has_valid_headers(self, data: EmissionsData) -> bool:
6868
# No entries
6969
return True
7070
dict_from_csv = dict(csv_entries_list[0])
71-
list_of_column_names = list(dict_from_csv.keys())
72-
return list(data.values.keys()) == list_of_column_names
71+
list_of_column_names = sorted(dict_from_csv.keys())
72+
return sorted(data.values.keys()) == list_of_column_names
7373

74-
def out(self, total: EmissionsData, _: EmissionsData):
74+
def out(self, total: EmissionsData, _):
7575
"""
7676
Save the emissions data from a whole run to a CSV file.
7777

tests/output_methods/test_file.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import shutil
33
import tempfile
44
import unittest
5-
from unittest.mock import MagicMock, patch
5+
from unittest.mock import patch
66

77
import pandas as pd
88

@@ -66,7 +66,7 @@ def test_file_output_initialization_invalid_dir(self):
6666

6767
def test_has_valid_headers_success(self):
6868
file_output = FileOutput("test.csv", self.temp_dir)
69-
file_output.out(self.emissions_data, MagicMock())
69+
file_output.out(self.emissions_data, None)
7070

7171
self.assertTrue(file_output.has_valid_headers(self.emissions_data))
7272

@@ -77,9 +77,19 @@ def test_has_valid_headers_success_with_empty_file(self):
7777

7878
self.assertTrue(file_output.has_valid_headers(self.emissions_data))
7979

80+
def test_has_valid_headers_different_order_success(self):
81+
file_output = FileOutput("test.csv", self.temp_dir)
82+
file_output.out(self.emissions_data, None)
83+
84+
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
85+
df = df[list(reversed(df.columns))]
86+
df.to_csv(os.path.join(self.temp_dir, "test.csv"), index=False)
87+
88+
self.assertTrue(file_output.has_valid_headers(self.emissions_data))
89+
8090
def test_has_valid_headers_failure(self):
8191
file_output = FileOutput("test.csv", self.temp_dir)
82-
file_output.out(self.emissions_data, MagicMock())
92+
file_output.out(self.emissions_data, None)
8393

8494
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
8595
df.rename(columns={"wue": "new_header"}, inplace=True)
@@ -90,10 +100,10 @@ def test_has_valid_headers_failure(self):
90100
@patch("codecarbon.output_methods.file.FileOutput.has_valid_headers")
91101
def test_file_output_out_file_exists_invalid_headers(self, mock_has_valid_headers):
92102
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
93-
file_output.out(self.emissions_data, MagicMock())
103+
file_output.out(self.emissions_data, None)
94104

95105
mock_has_valid_headers.return_value = False
96-
file_output.out(self.emissions_data, MagicMock())
106+
file_output.out(self.emissions_data, None)
97107

98108
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv.bak"))
99109
self.assertEqual(len(df), 1)
@@ -102,63 +112,67 @@ def test_file_output_out_file_exists_invalid_headers(self, mock_has_valid_header
102112

103113
def test_file_output_out_update_no_file_exists(self):
104114
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update")
105-
file_output.out(self.emissions_data, MagicMock())
115+
file_output.out(self.emissions_data, None)
106116

107117
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
108118
self.assertEqual(len(df), 1)
109119

110120
def test_file_output_out_append_no_file_exists(self):
111121
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
112-
file_output.out(self.emissions_data, MagicMock())
122+
file_output.out(self.emissions_data, None)
113123

114124
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
115125
self.assertEqual(len(df), 1)
116126

117127
def test_file_output_out_append_file_exists(self):
118128
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
119-
file_output.out(self.emissions_data, MagicMock())
120-
file_output.out(self.emissions_data, MagicMock())
129+
file_output.out(self.emissions_data, None)
130+
file_output.out(self.emissions_data, None)
121131

122132
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
123133
self.assertEqual(len(df), 2)
124134

125135
def test_file_output_out_update_file_exists_no_matching_row(self):
126136
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update")
127-
file_output.out(self.emissions_data, MagicMock())
137+
file_output.out(self.emissions_data, None)
128138

129139
updated_emissions_data = self.emissions_data
130140
updated_emissions_data.run_id = "new_test_run_id"
131-
file_output.out(updated_emissions_data, MagicMock())
141+
file_output.out(updated_emissions_data, None)
132142

133143
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
134144
self.assertEqual(len(df), 2)
135145

136146
def test_file_output_out_update_file_exists_multiple_matching_rows(self):
137147
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update")
138-
file_output.out(self.emissions_data, MagicMock())
148+
file_output.out(self.emissions_data, None)
139149

140150
# Manually add a duplicate row to simulate the condition
141151
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
142152
df = pd.concat([df, df])
143153
df.to_csv(os.path.join(self.temp_dir, "test.csv"), index=False)
144154

145-
file_output.out(self.emissions_data, MagicMock())
155+
file_output.out(self.emissions_data, None)
146156

147157
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
148158
self.assertEqual(len(df), 3)
149159

150160
def test_file_output_out_update_file_exists_one_matchingrows(self):
151161
file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update")
152-
file_output.out(self.emissions_data, MagicMock())
162+
file_output.out(self.emissions_data, None)
153163
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
154164
self.assertEqual(df["cpu_power"].iloc[0], 20)
155165

156166
new_data = self.emissions_data
157167
new_data.cpu_power = 2
158-
file_output.out(new_data, MagicMock())
168+
file_output.out(new_data, None)
159169
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
160170
self.assertEqual(df["cpu_power"].iloc[0], 2)
161171

172+
# def test_file_output_out_consistent_column_ordering(self):
173+
# file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")
174+
# file_output.out(self.emissions_data, None)
175+
162176
def test_file_output_task_out(self):
163177
task_emissions_data = [
164178
TaskEmissionsData(

0 commit comments

Comments
 (0)