Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions codecarbon/core/resource_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ def set_GPU_tracking(self):
self.gpu_tracker = "pynvml"
else:
logger.info("No GPU found.")
self.tracker._conf.setdefault("gpu_count", 0)
self.tracker._conf.setdefault("gpu_model", "")

def set_CPU_GPU_ram_tracking(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions codecarbon/emissions_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,8 @@ def _prepare_emissions_data(self) -> EmissionsData:
os=self._conf.get("os"),
python_version=self._conf.get("python_version"),
codecarbon_version=self._conf.get("codecarbon_version"),
gpu_count=self._conf.get("gpu_count"),
gpu_model=self._conf.get("gpu_model"),
gpu_count=self._conf.get("gpu_count", 0),
gpu_model=self._conf.get("gpu_model", ""),
cpu_count=self._conf.get("cpu_count"),
cpu_model=self._conf.get("cpu_model"),
longitude=self._conf.get("longitude"),
Expand Down
11 changes: 4 additions & 7 deletions codecarbon/output_methods/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,8 @@ def out(self, total: EmissionsData, _):
df = new_df
elif self.on_csv_write == "append":
df = pd.read_csv(self.save_file_path)
# Filter out empty or all-NA columns, to avoid warnings from Pandas,
# Filter out empty or all-NA columns only from new_df, to avoid warnings from Pandas,
# see https://github.com/pandas-dev/pandas/issues/55928
df = df.dropna(axis=1, how="all")
new_df = new_df.dropna(axis=1, how="all")
df = pd.concat([df, new_df])
else:
Expand Down Expand Up @@ -134,19 +133,17 @@ def task_out(self, data: List[TaskEmissionsData], experiment_name: str):
"""
Save the emissions data from a single task in an experiment run to a CSV file.

Does not attempt to backup existing files or prevent ovewritting them.
Does not attempt to backup existing files or prevent overwriting them.
"""
run_id = data[0].run_id
save_task_file_path = os.path.join(
self.output_dir, "emissions_" + experiment_name + "_" + run_id + ".csv"
)
df = pd.DataFrame(columns=data[0].values.keys())
new_df = pd.DataFrame.from_records(
[dict(data_point.values) for data_point in data]
)
# Filter out empty or all-NA columns, to avoid warnings from Pandas
# Filter out empty or all-NA columns only from new_df, to avoid warnings from Pandas
# see https://github.com/pandas-dev/pandas/issues/55928
df = df.dropna(axis=1, how="all")
new_df = new_df.dropna(axis=1, how="all")
df = pd.concat([df, new_df], ignore_index=True)
df = new_df
df.to_csv(save_task_file_path, index=False)
132 changes: 132 additions & 0 deletions tests/output_methods/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,138 @@ def test_file_output_out_update_empty_file_exists(self):
df = pd.read_csv(os.path.join(self.temp_dir, "test.csv"))
self.assertEqual(len(df), 1)

def test_file_output_out_append_no_gpu_consistent_columns(self):
"""Regression test: successive appends with gpu_count=None/gpu_model=None must
never trigger a format-change warning or produce a .bak backup file.

The bug: dropna(axis=1, how="all") was applied to the *existing* CSV DataFrame
as well as to new_df. On a CPU-only machine both gpu_count and gpu_model are
NaN in every row, so after the second write those columns were silently dropped.
The third write then detected a schema mismatch and backed up the file.
"""
no_gpu_data = EmissionsData(
timestamp="2023-01-01T00:00:00",
project_name="test_project",
run_id="test_run_id",
experiment_id="test_experiment_id",
duration=10,
emissions=0.5,
emissions_rate=0.05,
cpu_power=20,
gpu_power=0,
ram_power=5,
cpu_energy=200,
gpu_energy=0,
ram_energy=50,
energy_consumed=250,
water_consumed=0.1,
country_name="Testland",
country_iso_code="TS",
region="Test Region",
cloud_provider="",
cloud_region="",
os="TestOS",
python_version="3.8",
codecarbon_version="2.0",
cpu_count=4,
cpu_model="Test CPU",
gpu_count=None,
gpu_model=None,
longitude=0,
latitude=0,
ram_total_size=16,
tracking_mode="machine",
)

file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")

# Write four times — prior to the fix, the 3rd write triggered a backup.
for _ in range(4):
file_output.out(no_gpu_data, None)
self.assertTrue(
file_output.has_valid_headers(no_gpu_data),
"CSV headers became invalid after an append (gpu_count/gpu_model "
"columns were dropped by dropna).",
)

# No .bak file should have been created.
bak_path = file_output.save_file_path + ".bak"
self.assertFalse(
os.path.exists(bak_path),
"A backup file was created even though the CSV schema did not change.",
)

# All four rows must be present.
df = pd.read_csv(file_output.save_file_path)
self.assertEqual(len(df), 4)

# gpu_count and gpu_model columns must still be present (as NaN).
self.assertIn("gpu_count", df.columns)
self.assertIn("gpu_model", df.columns)

def test_file_output_out_append_no_gpu_zero_defaults(self):
"""Test that gpu_count=0 and gpu_model="" (the new tracker defaults for
CPU-only machines) produce consistent CSV columns across successive writes.
"""
no_gpu_data = EmissionsData(
timestamp="2023-01-01T00:00:00",
project_name="test_project",
run_id="test_run_id",
experiment_id="test_experiment_id",
duration=10,
emissions=0.5,
emissions_rate=0.05,
cpu_power=20,
gpu_power=0,
ram_power=5,
cpu_energy=200,
gpu_energy=0,
ram_energy=50,
energy_consumed=250,
water_consumed=0.1,
country_name="Testland",
country_iso_code="TS",
region="Test Region",
cloud_provider="",
cloud_region="",
os="TestOS",
python_version="3.8",
codecarbon_version="2.0",
cpu_count=4,
cpu_model="Test CPU",
gpu_count=0,
gpu_model="",
longitude=0,
latitude=0,
ram_total_size=16,
tracking_mode="machine",
)

file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append")

for _ in range(4):
file_output.out(no_gpu_data, None)
self.assertTrue(
file_output.has_valid_headers(no_gpu_data),
"CSV headers should remain consistent with gpu_count=0 / gpu_model=''.",
)

bak_path = file_output.save_file_path + ".bak"
self.assertFalse(
os.path.exists(bak_path),
"No backup should be created when columns are consistent.",
)

df = pd.read_csv(file_output.save_file_path)
self.assertEqual(len(df), 4)
self.assertIn("gpu_count", df.columns)
self.assertIn("gpu_model", df.columns)
# With 0/"" defaults, gpu_count should be 0 (not NaN)
self.assertTrue((df["gpu_count"] == 0).all())
# gpu_model="" is read back as NaN by pandas (empty string in CSV),
# but the column must still be present.
self.assertIn("gpu_model", df.columns)

def test_file_output_task_out(self):
task_emissions_data = [
TaskEmissionsData(
Expand Down
Loading