2626 compute_difference ,
2727)
2828from policyengine_core .simulations import Microsimulation
29- from policyengine_core .tools .hugging_face import download_huggingface_dataset
29+ from policyengine_core .tools .hugging_face import (
30+ download_huggingface_dataset ,
31+ )
32+ from policyengine_api .utils .hugging_face import get_latest_commit_tag
3033import h5py
3134
3235from policyengine_us import Microsimulation
3336from policyengine_uk import Microsimulation
3437import logging
38+ import huggingface_hub
3539
3640load_dotenv ()
3741
4448CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5"
4549POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
4650
51+ datasets = {
52+ "uk" : {
53+ "enhanced_frs" : ENHANCED_FRS ,
54+ "frs" : FRS ,
55+ },
56+ "us" : {
57+ "enhanced_cps" : ENHANCED_CPS ,
58+ "cps" : CPS ,
59+ "pooled_cps" : POOLED_CPS ,
60+ },
61+ }
62+
63+ us_dataset_version = get_latest_commit_tag (
64+ repo_id = "policyengine/policyengine-us-data" ,
65+ repo_type = "model" ,
66+ )
67+ uk_dataset_version = get_latest_commit_tag (
68+ repo_id = "policyengine/policyengine-uk-data-private" ,
69+ repo_type = "model" ,
70+ )
71+
72+ for dataset in datasets ["uk" ]:
73+ datasets ["uk" ][dataset ] = f"{ datasets ['uk' ][dataset ]} @{ uk_dataset_version } "
74+
75+ for dataset in datasets ["us" ]:
76+ datasets ["us" ][dataset ] = f"{ datasets ['us' ][dataset ]} @{ us_dataset_version } "
77+
78+
4779check_against_api_v2 = (
4880 os .environ .get ("GOOGLE_APPLICATION_CREDENTIALS" ) is not None
4981)
@@ -189,6 +221,12 @@ def run(
189221 time_period = time_period ,
190222 region = region ,
191223 dataset = dataset ,
224+ model_version = COUNTRY_PACKAGE_VERSIONS [country_id ],
225+ data_version = (
226+ uk_dataset_version
227+ if country_id == "uk"
228+ else us_dataset_version
229+ ),
192230 )
193231
194232 try :
@@ -454,7 +492,7 @@ def _create_simulation_uk(
454492
455493 simulation = CountryMicrosimulation (
456494 reform = reform ,
457- dataset = ENHANCED_FRS ,
495+ dataset = datasets [ "uk" ][ "enhanced_frs" ] ,
458496 )
459497 simulation .default_calculation_period = time_period
460498 if region != "uk" :
@@ -514,7 +552,7 @@ def _create_simulation_us(
514552 if dataset in DATASETS :
515553 print (f"Running simulation using { dataset } dataset" )
516554
517- sim_options ["dataset" ] = ENHANCED_CPS
555+ sim_options ["dataset" ] = datasets [ "us" ][ "enhanced_cps" ]
518556
519557 # Handle region settings
520558 if region != "us" :
@@ -526,7 +564,7 @@ def _create_simulation_us(
526564 if "dataset" in sim_options :
527565 filter_dataset = sim_options ["dataset" ]
528566 else :
529- filter_dataset = POOLED_CPS
567+ filter_dataset = datasets [ "us" ][ "pooled_cps" ]
530568
531569 # Run sim to filter by region
532570 region_sim = Microsimulation (
@@ -547,7 +585,7 @@ def _create_simulation_us(
547585 sim_options ["dataset" ] = df [state_code == region .upper ()]
548586
549587 if dataset == "default" and region == "us" :
550- sim_options ["dataset" ] = CPS
588+ sim_options ["dataset" ] = datasets [ "us" ][ "cps" ]
551589
552590 # Return completed simulation
553591 return Microsimulation (** sim_options )
@@ -723,6 +761,8 @@ def _setup_sim_options(
723761 dataset : str ,
724762 time_period : str ,
725763 scope : Literal ["macro" , "household" ] = "macro" ,
764+ model_version : str | None = None ,
765+ data_version : str | None = None ,
726766 ) -> dict [str , Any ]:
727767 """
728768 Set up the simulation options for the APIv2 job.
@@ -738,6 +778,8 @@ def _setup_sim_options(
738778 "data" : self ._setup_data (
739779 dataset = dataset , country_id = country_id , region = region
740780 ),
781+ "model_version" : model_version ,
782+ "data_version" : data_version ,
741783 }
742784
743785 def _setup_region (self , country_id : str , region : str ) -> str :
0 commit comments