@@ -197,6 +197,138 @@ def test_file_output_out_update_empty_file_exists(self):
197197 df = pd .read_csv (os .path .join (self .temp_dir , "test.csv" ))
198198 self .assertEqual (len (df ), 1 )
199199
200+ def test_file_output_out_append_no_gpu_consistent_columns (self ):
201+ """Regression test: successive appends with gpu_count=None/gpu_model=None must
202+ never trigger a format-change warning or produce a .bak backup file.
203+
204+ The bug: dropna(axis=1, how="all") was applied to the *existing* CSV DataFrame
205+ as well as to new_df. On a CPU-only machine both gpu_count and gpu_model are
206+ NaN in every row, so after the second write those columns were silently dropped.
207+ The third write then detected a schema mismatch and backed up the file.
208+ """
209+ no_gpu_data = EmissionsData (
210+ timestamp = "2023-01-01T00:00:00" ,
211+ project_name = "test_project" ,
212+ run_id = "test_run_id" ,
213+ experiment_id = "test_experiment_id" ,
214+ duration = 10 ,
215+ emissions = 0.5 ,
216+ emissions_rate = 0.05 ,
217+ cpu_power = 20 ,
218+ gpu_power = 0 ,
219+ ram_power = 5 ,
220+ cpu_energy = 200 ,
221+ gpu_energy = 0 ,
222+ ram_energy = 50 ,
223+ energy_consumed = 250 ,
224+ water_consumed = 0.1 ,
225+ country_name = "Testland" ,
226+ country_iso_code = "TS" ,
227+ region = "Test Region" ,
228+ cloud_provider = "" ,
229+ cloud_region = "" ,
230+ os = "TestOS" ,
231+ python_version = "3.8" ,
232+ codecarbon_version = "2.0" ,
233+ cpu_count = 4 ,
234+ cpu_model = "Test CPU" ,
235+ gpu_count = None ,
236+ gpu_model = None ,
237+ longitude = 0 ,
238+ latitude = 0 ,
239+ ram_total_size = 16 ,
240+ tracking_mode = "machine" ,
241+ )
242+
243+ file_output = FileOutput ("test.csv" , self .temp_dir , on_csv_write = "append" )
244+
245+ # Write four times — prior to the fix, the 3rd write triggered a backup.
246+ for _ in range (4 ):
247+ file_output .out (no_gpu_data , None )
248+ self .assertTrue (
249+ file_output .has_valid_headers (no_gpu_data ),
250+ "CSV headers became invalid after an append (gpu_count/gpu_model "
251+ "columns were dropped by dropna)." ,
252+ )
253+
254+ # No .bak file should have been created.
255+ bak_path = file_output .save_file_path + ".bak"
256+ self .assertFalse (
257+ os .path .exists (bak_path ),
258+ "A backup file was created even though the CSV schema did not change." ,
259+ )
260+
261+ # All four rows must be present.
262+ df = pd .read_csv (file_output .save_file_path )
263+ self .assertEqual (len (df ), 4 )
264+
265+ # gpu_count and gpu_model columns must still be present (as NaN).
266+ self .assertIn ("gpu_count" , df .columns )
267+ self .assertIn ("gpu_model" , df .columns )
268+
269+ def test_file_output_out_append_no_gpu_zero_defaults (self ):
270+ """Test that gpu_count=0 and gpu_model="" (the new tracker defaults for
271+ CPU-only machines) produce consistent CSV columns across successive writes.
272+ """
273+ no_gpu_data = EmissionsData (
274+ timestamp = "2023-01-01T00:00:00" ,
275+ project_name = "test_project" ,
276+ run_id = "test_run_id" ,
277+ experiment_id = "test_experiment_id" ,
278+ duration = 10 ,
279+ emissions = 0.5 ,
280+ emissions_rate = 0.05 ,
281+ cpu_power = 20 ,
282+ gpu_power = 0 ,
283+ ram_power = 5 ,
284+ cpu_energy = 200 ,
285+ gpu_energy = 0 ,
286+ ram_energy = 50 ,
287+ energy_consumed = 250 ,
288+ water_consumed = 0.1 ,
289+ country_name = "Testland" ,
290+ country_iso_code = "TS" ,
291+ region = "Test Region" ,
292+ cloud_provider = "" ,
293+ cloud_region = "" ,
294+ os = "TestOS" ,
295+ python_version = "3.8" ,
296+ codecarbon_version = "2.0" ,
297+ cpu_count = 4 ,
298+ cpu_model = "Test CPU" ,
299+ gpu_count = 0 ,
300+ gpu_model = "" ,
301+ longitude = 0 ,
302+ latitude = 0 ,
303+ ram_total_size = 16 ,
304+ tracking_mode = "machine" ,
305+ )
306+
307+ file_output = FileOutput ("test.csv" , self .temp_dir , on_csv_write = "append" )
308+
309+ for _ in range (4 ):
310+ file_output .out (no_gpu_data , None )
311+ self .assertTrue (
312+ file_output .has_valid_headers (no_gpu_data ),
313+ "CSV headers should remain consistent with gpu_count=0 / gpu_model=''." ,
314+ )
315+
316+ bak_path = file_output .save_file_path + ".bak"
317+ self .assertFalse (
318+ os .path .exists (bak_path ),
319+ "No backup should be created when columns are consistent." ,
320+ )
321+
322+ df = pd .read_csv (file_output .save_file_path )
323+ self .assertEqual (len (df ), 4 )
324+ self .assertIn ("gpu_count" , df .columns )
325+ self .assertIn ("gpu_model" , df .columns )
326+ # With 0/"" defaults, gpu_count should be 0 (not NaN)
327+ self .assertTrue ((df ["gpu_count" ] == 0 ).all ())
328+ # gpu_model="" is read back as NaN by pandas (empty string in CSV),
329+ # but the column must still be present.
330+ self .assertIn ("gpu_model" , df .columns )
331+
200332 def test_file_output_task_out (self ):
201333 task_emissions_data = [
202334 TaskEmissionsData (
0 commit comments