Skip to content

Commit 7bce6bf

Browse files
Route CLI status messages to stderr (#926)
Closes #858. Dataset-setup notices, microsim "Running" / "completed" lines, and "Results saved to ..." were all going to stdout, contaminating piped CSV output in the default `policyengine-taxsim < input > output` flow and in Daniel's `-o /dev/stdout` workflow. Send them to stderr instead, matching the existing tqdm progress bars. Stdout now contains only the CSV results; status output (including tqdm) is exclusively on stderr. Users wanting full silence can pipe stderr to /dev/null. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3d19bac commit 7bce6bf

4 files changed

Lines changed: 15 additions & 9 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Route CLI status messages (dataset setup, microsim progress, "Results saved") to stderr so they don't contaminate piped CSV output on stdout.

policyengine_taxsim/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,13 @@ def policyengine(input_file, output, logs, disable_salt, assume_w2_wages, sample
197197

198198
# Generate YAML files if requested
199199
if logs:
200-
click.echo("Generating PolicyEngine YAML test files...")
200+
click.echo("Generating PolicyEngine YAML test files...", err=True)
201201
_generate_yaml_files(df_with_ids, results_df)
202-
click.echo(f"Generated {len(df_with_ids)} YAML test files")
202+
click.echo(f"Generated {len(df_with_ids)} YAML test files", err=True)
203203

204204
# Save results to output file
205205
write_output(results_df, output)
206-
click.echo(f"Results saved to {output}")
206+
click.echo(f"Results saved to {output}", err=True)
207207

208208
except Exception as e:
209209
click.echo(f"Error processing input: {str(e)}", err=True)

policyengine_taxsim/runners/base_runner.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import pandas as pd
24
from abc import ABC, abstractmethod
35
from typing import Optional, Union
@@ -103,14 +105,14 @@ def save_results(
103105
"""
104106
if results_df is None:
105107
if self.results is None:
106-
print("Running calculations to generate results...")
108+
print("Running calculations to generate results...", file=sys.stderr)
107109
results_df = self.run()
108110
else:
109111
results_df = self.results
110112

111113
output_path = Path(output_path)
112114
write_output(results_df, output_path)
113-
print(f"Results saved to: {output_path}")
115+
print(f"Results saved to: {output_path}", file=sys.stderr)
114116

115117
def get_record_count(self) -> int:
116118
"""Get number of records in input data"""

policyengine_taxsim/runners/policyengine_runner.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import pandas as pd
24
import numpy as np
35
import tempfile
@@ -733,7 +735,7 @@ def generate(self) -> None:
733735
self.input_df = self._ensure_required_columns(self.input_df)
734736

735737
# Set defaults and convert TAXSIM32 format (vectorized)
736-
print("Setting defaults for TAXSIM records...")
738+
print("Setting defaults for TAXSIM records...", file=sys.stderr)
737739
self.input_df = self._apply_defaults_vectorized(self.input_df)
738740

739741
# Extract years (assuming all records might have different years)
@@ -749,7 +751,7 @@ def generate(self) -> None:
749751
data = self._initialize_dataset_structure()
750752

751753
# Process each year separately
752-
print("Processing years for dataset generation...")
754+
print("Processing years for dataset generation...", file=sys.stderr)
753755
for year in tqdm(unique_years, desc="Dataset generation by year"):
754756
year_mask = self.input_df["year"] == year
755757
year_data = self.input_df[year_mask].copy()
@@ -1005,7 +1007,8 @@ def run(self, show_progress: bool = True, on_progress=None) -> pd.DataFrame:
10051007
"""
10061008
if show_progress:
10071009
print(
1008-
f"Running PolicyEngine Microsimulation on {len(self.input_df)} records"
1010+
f"Running PolicyEngine Microsimulation on {len(self.input_df)} records",
1011+
file=sys.stderr,
10091012
)
10101013

10111014
# Ensure years are integers to handle decimal values like 2021.0
@@ -1043,7 +1046,7 @@ def run(self, show_progress: bool = True, on_progress=None) -> pd.DataFrame:
10431046
results_df = pd.concat(frames, ignore_index=True)
10441047

10451048
if show_progress:
1046-
print("PolicyEngine Microsimulation completed")
1049+
print("PolicyEngine Microsimulation completed", file=sys.stderr)
10471050

10481051
return results_df
10491052

0 commit comments

Comments
 (0)