@@ -167,10 +167,32 @@ def get_current_commit() -> str:
167167 return "unknown"
168168
169169
170+ def _get_storage_folder () -> Path :
171+ """Resolve the installed package's STORAGE_FOLDER path.
172+
173+ This is where Dataset classes (CPS_2024, etc.) look for H5 files.
174+ In an editable install it matches the source tree; in a regular
175+ install it's inside .venv/lib/.../site-packages/.
176+ """
177+ try :
178+ from policyengine_us_data .storage import STORAGE_FOLDER
179+
180+ return Path (STORAGE_FOLDER )
181+ except ImportError :
182+ # Fallback if package not importable (shouldn't happen in
183+ # the Modal image, but safe for local dev)
184+ return Path ("policyengine_us_data/storage" )
185+
186+
170187def get_checkpoint_path (branch : str , output_file : str ) -> Path :
171- """Get the checkpoint path for an output file, scoped by branch and commit."""
188+ """Get the checkpoint path for an output file, scoped by branch and commit.
189+
190+ Preserves the relative path structure to avoid filename collisions
191+ (e.g., calibration/policy_data.db stays distinct from policy_data.db).
192+ """
172193 commit = get_current_commit ()
173- return Path (VOLUME_MOUNT ) / branch / commit / Path (output_file ).name
194+ # Use the relative path as-is (not just filename) to avoid collisions
195+ return Path (VOLUME_MOUNT ) / branch / commit / output_file
174196
175197
176198def is_checkpointed (branch : str , output_file : str ) -> bool :
@@ -183,13 +205,44 @@ def is_checkpointed(branch: str, output_file: str) -> bool:
183205 return False
184206
185207
208+ def _resolve_local_path (output_file : str ) -> Path :
209+ """Resolve where a checkpointed file should be restored to.
210+
211+ Maps the relative source-tree path to the installed package's
212+ STORAGE_FOLDER so that Dataset classes can find the files.
213+ """
214+ output_path = Path (output_file )
215+ storage_folder = _get_storage_folder ()
216+
217+ # Files under policyengine_us_data/storage/ get mapped to
218+ # the installed package's STORAGE_FOLDER
219+ storage_prefix = Path ("policyengine_us_data/storage" )
220+ try :
221+ relative = output_path .relative_to (storage_prefix )
222+ return storage_folder / relative
223+ except ValueError :
224+ # Not under storage/ — use the path as-is (relative to cwd)
225+ return output_path
226+
227+
186228def restore_from_checkpoint (branch : str , output_file : str ) -> bool :
187- """Restore output file from checkpoint volume if it exists."""
229+ """Restore output file from checkpoint volume to STORAGE_FOLDER.
230+
231+ Writes to the installed package's storage directory so that
232+ Dataset classes (which use STORAGE_FOLDER) can find the files.
233+ """
188234 checkpoint_path = get_checkpoint_path (branch , output_file )
189235 if checkpoint_path .exists () and checkpoint_path .stat ().st_size > 0 :
190- local_path = Path (output_file )
236+ local_path = _resolve_local_path (output_file )
191237 local_path .parent .mkdir (parents = True , exist_ok = True )
192238 shutil .copy2 (checkpoint_path , local_path )
239+ # Also restore to the source-tree relative path so that
240+ # scripts run via subprocess (which use cwd-relative paths)
241+ # can find the file.
242+ source_path = Path (output_file )
243+ if source_path != local_path :
244+ source_path .parent .mkdir (parents = True , exist_ok = True )
245+ shutil .copy2 (checkpoint_path , source_path )
193246 print (f"Restored from checkpoint: { output_file } " )
194247 return True
195248 return False
@@ -200,12 +253,24 @@ def save_checkpoint(
200253 output_file : str ,
201254 volume : modal .Volume ,
202255) -> None :
203- """Save output file to checkpoint volume."""
204- local_path = Path (output_file )
256+ """Save output file to checkpoint volume.
257+
258+ Checks both the installed package path and the source-tree
259+ relative path to find the file.
260+ """
261+ local_path = _resolve_local_path (output_file )
262+ source_path = Path (output_file )
263+ # Try installed path first, fall back to source-tree path
264+ actual_path = None
205265 if local_path .exists () and local_path .stat ().st_size > 0 :
266+ actual_path = local_path
267+ elif source_path .exists () and source_path .stat ().st_size > 0 :
268+ actual_path = source_path
269+
270+ if actual_path :
206271 checkpoint_path = get_checkpoint_path (branch , output_file )
207272 checkpoint_path .parent .mkdir (parents = True , exist_ok = True )
208- shutil .copy2 (local_path , checkpoint_path )
273+ shutil .copy2 (actual_path , checkpoint_path )
209274 with _volume_lock :
210275 volume .commit ()
211276 print (f"Checkpointed: { output_file } " )
@@ -734,6 +799,9 @@ def run_single_script(
734799 setup_gcp_credentials ()
735800 os .chdir ("/root/policyengine-us-data" )
736801
802+ # Reload volume to see writes from prior --script containers
803+ checkpoint_volume .reload ()
804+
737805 # Resolve short name to full path
738806 script_path = SCRIPT_SHORT_NAMES .get (script_name , script_name )
739807
@@ -822,6 +890,9 @@ def run_integration_test(
822890 setup_gcp_credentials ()
823891 os .chdir ("/root/policyengine-us-data" )
824892
893+ # Reload volume to see writes from prior containers
894+ checkpoint_volume .reload ()
895+
825896 # Restore all prerequisites and dataset outputs
826897 for prereq in PREREQUISITE_FILES :
827898 restore_from_checkpoint (branch , prereq )
0 commit comments