Skip to content

Commit 8614bad

Browse files
authored
Merge pull request #846 from PolicyEngine/maria/indentation_error
Fix diagnostics upload script indentation
2 parents f65ac5a + 61623aa commit 8614bad

5 files changed

Lines changed: 80 additions & 17 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix the Modal pipeline diagnostics-upload helper so its generated `python -c` script is valid Python and covered by a unit test before deployment.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Enable all state- and district-level EITC and CTC targets in the default calibration target configuration.

modal_app/pipeline.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -416,23 +416,7 @@ def upload_run_diagnostics(
416416
result = subprocess.run(
417417
_python_cmd(
418418
"-c",
419-
f"""
420-
import json, os
421-
from huggingface_hub import HfApi
422-
423-
entries = json.loads('''{entries_json}''')
424-
api = HfApi()
425-
token = os.environ.get("HUGGING_FACE_TOKEN")
426-
for local_path, repo_path in entries:
427-
api.upload_file(
428-
path_or_fileobj=local_path,
429-
path_in_repo=repo_path,
430-
repo_id="policyengine/policyengine-us-data",
431-
repo_type="model",
432-
token=token,
433-
)
434-
print(f"Uploaded {{repo_path}}")
435-
""",
419+
_build_diagnostics_upload_script(entries_json),
436420
),
437421
cwd="/root/policyengine-us-data",
438422
capture_output=True,
@@ -444,6 +428,32 @@ def upload_run_diagnostics(
444428
print(f" {result.stdout.strip()}")
445429

446430

431+
def _build_diagnostics_upload_script(entries_json: str) -> str:
432+
"""Build the isolated diagnostics-upload script.
433+
434+
Keep this snippet syntactically self-contained: it is passed directly to
435+
``python -c`` inside the Modal orchestrator container.
436+
"""
437+
return f"""
438+
import json
439+
import os
440+
from huggingface_hub import HfApi
441+
442+
entries = json.loads({entries_json!r})
443+
api = HfApi()
444+
token = os.environ.get("HUGGING_FACE_TOKEN")
445+
for local_path, repo_path in entries:
446+
api.upload_file(
447+
path_or_fileobj=local_path,
448+
path_in_repo=repo_path,
449+
repo_id="policyengine/policyengine-us-data",
450+
repo_type="model",
451+
token=token,
452+
)
453+
print(f"Uploaded {{repo_path}}")
454+
"""
455+
456+
447457
@app.function(
448458
image=image,
449459
timeout=300,

policyengine_us_data/calibration/target_config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ include:
2626
geo_level: district
2727
- variable: taxable_pension_income
2828
geo_level: district
29+
- variable: eitc
30+
geo_level: district
2931
- variable: refundable_ctc
3032
geo_level: district
3133
- variable: non_refundable_ctc
@@ -58,6 +60,12 @@ include:
5860
- variable: spm_unit_count
5961
geo_level: state
6062
domain_variable: tanf
63+
- variable: eitc
64+
geo_level: state
65+
- variable: refundable_ctc
66+
geo_level: state
67+
- variable: non_refundable_ctc
68+
geo_level: state
6169

6270
# === STATE — ACA marketplace APTC and bronze-plan enrollment counts ===
6371
- variable: tax_unit_count

tests/unit/test_pipeline.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""Tests for pipeline orchestrator metadata and helpers."""
22

33
import json
4+
import sys
45
import time
6+
from types import ModuleType
57
from unittest.mock import MagicMock, patch
68

79
import pytest
@@ -10,6 +12,7 @@
1012

1113
from modal_app.pipeline import (
1214
RunMetadata,
15+
_build_diagnostics_upload_script,
1316
_step_completed,
1417
_record_step,
1518
generate_run_id,
@@ -259,3 +262,43 @@ def test_read_nonexistent_raises(self):
259262
):
260263
with pytest.raises(FileNotFoundError):
261264
read_run_meta("fake_run", mock_vol)
265+
266+
267+
def test_diagnostics_upload_script_is_valid_python(monkeypatch, capsys):
268+
entries = [
269+
(
270+
"/pipeline/runs/test/diagnostics/unified_diagnostics.csv",
271+
"calibration/runs/test/diagnostics/unified_diagnostics.csv",
272+
)
273+
]
274+
entries_json = json.dumps(entries)
275+
276+
script = _build_diagnostics_upload_script(entries_json)
277+
278+
compile(script, "<diagnostics-upload>", "exec")
279+
assert "\t" not in script
280+
assert "api.upload_file(" in script
281+
282+
calls = []
283+
284+
class FakeHfApi:
285+
def upload_file(self, **kwargs):
286+
calls.append(kwargs)
287+
288+
fake_hub = ModuleType("huggingface_hub")
289+
fake_hub.HfApi = FakeHfApi
290+
monkeypatch.setitem(sys.modules, "huggingface_hub", fake_hub)
291+
monkeypatch.setenv("HUGGING_FACE_TOKEN", "token")
292+
293+
exec(compile(script, "<diagnostics-upload>", "exec"), {})
294+
295+
assert calls == [
296+
{
297+
"path_or_fileobj": entries[0][0],
298+
"path_in_repo": entries[0][1],
299+
"repo_id": "policyengine/policyengine-us-data",
300+
"repo_type": "model",
301+
"token": "token",
302+
}
303+
]
304+
assert capsys.readouterr().out == f"Uploaded {entries[0][1]}\n"

0 commit comments

Comments
 (0)