Migrate to APIv2 for economic impacts (#2398)

nikhilwoodruff · web-flow · commit 5320bece3417 · 2025-04-25T15:34:25.000+01:00
* Migrate to APIv2 for economic impacts Fixes #2397 * Re-add v1 impacts, just turn off by default * Lint
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    changed:
+    - Economic impacts from APIv1 to APIv2
diff --git a/gcp/policyengine_api/app.yaml b/gcp/policyengine_api/app.yaml
@@ -1,9 +1,9 @@
 runtime: custom
 env: flex
 resources:
-  cpu: 24
-  memory_gb: 128
-  disk_size_gb: 128
+  cpu: 4
+  memory_gb: 16
+  disk_size_gb: 64
 automatic_scaling:
   min_num_instances: 1
   max_num_instances: 1
diff --git a/policyengine_api/jobs/calculate_economy_simulation_job.py b/policyengine_api/jobs/calculate_economy_simulation_job.py
@@ -37,20 +37,18 @@
 CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5"
 POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
 
-check_against_api_v2 = (
-    os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
-)
+use_api_v2 = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
 
-if not check_against_api_v2:
+if not use_api_v2:
     logging.warn(
-        "Didn't find any GOOGLE_APPLICATION_CREDENTIALS, so will not check results for matches against APIv2."
+        "Didn't find any GOOGLE_APPLICATION_CREDENTIALS, so will not use APIv2."
     )
 
 
 class CalculateEconomySimulationJob(BaseJob):
     def __init__(self):
         super().__init__()
-        if check_against_api_v2:
+        if use_api_v2:
             self.api_v2 = SimulationAPIv2()
 
     def run(
@@ -152,7 +150,7 @@ def run(
             comment("Computing baseline")
 
             # Kick off APIv2 job
-            if check_against_api_v2:
+            if use_api_v2:
                 input_data = {
                     "country": country_id,
                     "scope": "macro",
@@ -162,51 +160,35 @@ def run(
                 }
                 execution = self.api_v2.run(input_data)
 
-            # Compute baseline economy
-            baseline_economy = self._compute_economy(
-                country_id=country_id,
-                region=region,
-                dataset=dataset,
-                time_period=time_period,
-                options=options,
-                policy_json=baseline_policy,
-            )
-            comment("Computing reform")
-
-            # Compute reform economy
-            reform_economy = self._compute_economy(
-                country_id=country_id,
-                region=region,
-                dataset=dataset,
-                time_period=time_period,
-                options=options,
-                policy_json=reform_policy,
-            )
-
-            baseline_economy = baseline_economy["result"]
-            reform_economy = reform_economy["result"]
-            comment("Comparing baseline and reform")
-            impact = compare_economic_outputs(
-                baseline_economy, reform_economy, country_id=country_id
-            )
+                impact = self.api_v2.wait_for_completion(execution)
+            else:
+                # Compute baseline economy
+                baseline_economy = self._compute_economy(
+                    country_id=country_id,
+                    region=region,
+                    dataset=dataset,
+                    time_period=time_period,
+                    options=options,
+                    policy_json=baseline_policy,
+                )
+                comment("Computing reform")
 
-            # Wait for APIv2 job to complete
-            if check_against_api_v2:
-                result = self.api_v2.wait_for_completion(execution)
-                if result is None:
-                    print("APIv2 COMPARISON failed: result is not JSON.")
-                else:
-                    try:
-                        print(
-                            f"APIv2 COMPARISON: match={is_similar(result, json.loads(json.dumps(impact)))}"
-                        )
-                    except:
-                        print("APIv2 COMPARISON: ERROR COMPARING", result)
+                # Compute reform economy
+                reform_economy = self._compute_economy(
+                    country_id=country_id,
+                    region=region,
+                    dataset=dataset,
+                    time_period=time_period,
+                    options=options,
+                    policy_json=reform_policy,
+                )
 
-            if options.get("apiv2", False):
-                # If the APIv2 job was successful, use its result
-                if result is not None:
-                    impact = result
+                baseline_economy = baseline_economy["result"]
+                reform_economy = reform_economy["result"]
+                comment("Comparing baseline and reform")
+                impact = compare_economic_outputs(
+                    baseline_economy, reform_economy, country_id=country_id
+                )
 
             # Finally, update all reform impact rows with the same baseline and reform policy IDs
             reform_impacts_service.set_complete_reform_impact(
@@ -468,79 +450,6 @@ def _compute_cliff_impacts(self, simulation: Microsimulation) -> Dict:
         }
 
 
-def is_similar(x, y, parent_name: str = "") -> bool:
-    if x is None or x == {}:
-        if y is None or y == {}:
-            return True
-    # Handle None values
-    if x is None or y is None:
-        equal = x is y
-        if not equal:
-            print(f"Not equal: {x} vs {y} in {parent_name}")
-        return equal
-
-    # Handle different types
-    if type(x) != type(y):
-        if float in ((type(x), type(y))) and int in ((type(x), type(y))):
-            pass
-        else:
-            print(f"Different types: {type(x)} vs {type(y)} in {parent_name}")
-            return False
-
-    # Handle numeric values
-    if isinstance(x, (int, float)):
-        close = (abs(y - x) < 1e-2) or (abs(y - x) / abs(x) < 0.01)
-        if not close:
-            print(f"Not close: {x} vs {y} in {parent_name}")
-        return close
-
-    # Handle boolean values
-    elif isinstance(x, bool):
-        equal = x == y
-        if not equal:
-            print(f"Not equal: {x} vs {y} in {parent_name}")
-        return equal
-
-    # Handle string values
-    elif isinstance(x, str):
-        equal = x == y
-        if not equal:
-            print(f"Not equal: {x} vs {y} in {parent_name}")
-        return equal
-
-    # Handle dictionaries
-    elif isinstance(x, dict):
-        # Check for keys in both dictionaries
-        all_keys = set(x.keys()) | set(y.keys())
-        for k in all_keys:
-            if k not in x:
-                print(f"Key {k} missing in first dict in {parent_name}")
-                return False
-            if k not in y:
-                print(f"Key {k} missing in second dict in {parent_name}")
-                return False
-            if not is_similar(x[k], y[k], parent_name=parent_name + "/" + k):
-                return False
-        return True
-
-    # Handle lists
-    elif isinstance(x, list):
-        if len(x) != len(y):
-            print(f"Different lengths: {len(x)} vs {len(y)} in {parent_name}")
-            return False
-        return all(
-            is_similar(x[i], y[i], parent_name=parent_name + f"[{i}]")
-            for i in range(len(x))
-        )
-
-    # Handle other types
-    else:
-        equal = x == y
-        if not equal:
-            print(f"Not equal: {x} vs {y} in {parent_name}")
-        return equal
-
-
 class SimulationAPIv2:
     project: str
     location: str