|
25 | 25 | from policyengine_uk_data.storage import STORAGE_FOLDER |
26 | 26 | from policyengine_uk.data import UKSingleYearDataset |
27 | 27 | from policyengine_uk import Microsimulation |
| 28 | +from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR |
28 | 29 |
|
29 | 30 | LCFS_TAB_FOLDER = STORAGE_FOLDER / "lcfs_2021_22" |
30 | 31 |
|
| 32 | +# Default seed for the stochastic ICE-vehicle flag drawn from |
| 33 | +# `NTS_2024_ICE_VEHICLE_SHARE`. Kept at 42 for backward compatibility with |
| 34 | +# existing artefact fingerprints; callers can override via the fixture's |
| 35 | +# local RNG rather than the process-wide np.random global. |
| 36 | +_HAS_FUEL_SEED = 42 |
| 37 | + |
31 | 38 | # EV/ICE vehicle mix from NTS 2024 |
32 | 39 | NTS_2024_ICE_VEHICLE_SHARE = 0.90 |
33 | 40 |
|
@@ -338,6 +345,13 @@ def _derive_energy_from_lcfs(household: pd.DataFrame) -> pd.DataFrame: |
338 | 345 | electricity[mask4] = p537[mask4] * mean_elec_share |
339 | 346 | gas[mask4] = p537[mask4] * (1 - mean_elec_share) |
340 | 347 |
|
| 348 | + # Clamp to non-negative; raw LCFS bill variables occasionally produce |
| 349 | + # small negatives (e.g. B490 > B489 inconsistency, or implausible |
| 350 | + # negative P537 entries). Consumption totals can't be negative by |
| 351 | + # definition and downstream NEED calibration preserves zero. |
| 352 | + electricity = np.maximum(electricity, 0.0) |
| 353 | + gas = np.maximum(gas, 0.0) |
| 354 | + |
341 | 355 | household = household.copy() |
342 | 356 | household["electricity_consumption"] = electricity |
343 | 357 | household["gas_consumption"] = gas |
@@ -406,9 +420,12 @@ def create_has_fuel_model(): |
406 | 420 |
|
407 | 421 | num_vehicles = was["vcarnr7"].fillna(0).clip(lower=0) |
408 | 422 | has_vehicle = num_vehicles > 0 |
409 | | - np.random.seed(42) |
| 423 | + # Use a local RNG so we don't mutate the global np.random state (which |
| 424 | + # would silently change any unrelated consumer of np.random that runs |
| 425 | + # after this function). |
| 426 | + rng = np.random.default_rng(_HAS_FUEL_SEED) |
410 | 427 | has_fuel = ( |
411 | | - has_vehicle & (np.random.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE) |
| 428 | + has_vehicle & (rng.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE) |
412 | 429 | ).astype(float) |
413 | 430 |
|
414 | 431 | was_df = pd.DataFrame( |
@@ -481,18 +498,21 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame) |
481 | 498 |
|
482 | 499 | household = household.rename(columns=CONSUMPTION_VARIABLE_RENAMES) |
483 | 500 |
|
484 | | - # Annualise weekly LCFS values (× 52) |
| 501 | + # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant |
| 502 | + # (365.25 / 7 ≈ 52.1786) as `datasets/frs.py` rather than a bare `* 52`, |
| 503 | + # which underestimates annual totals by ~0.34% and skews VAT / energy |
| 504 | + # imputation targets against FRS income. |
485 | 505 | annualise = list(CONSUMPTION_VARIABLE_RENAMES.values()) + [ |
486 | 506 | "hbai_household_net_income", |
487 | 507 | "household_gross_income", |
488 | 508 | "electricity_consumption", |
489 | 509 | "gas_consumption", |
490 | 510 | ] |
491 | 511 | for variable in annualise: |
492 | | - household[variable] = household[variable] * 52 |
| 512 | + household[variable] = household[variable] * WEEKS_IN_YEAR |
493 | 513 | for variable in PERSON_LCF_RENAMES.values(): |
494 | 514 | household[variable] = ( |
495 | | - person[variable].groupby(person.case).sum()[household.case] * 52 |
| 515 | + person[variable].groupby(person.case).sum()[household.case] * WEEKS_IN_YEAR |
496 | 516 | ) |
497 | 517 | household.household_weight *= 1_000 |
498 | 518 |
|
@@ -577,9 +597,10 @@ def impute_consumption(dataset: UKSingleYearDataset) -> UKSingleYearDataset: |
577 | 597 | sim = Microsimulation(dataset=dataset) |
578 | 598 | num_vehicles = sim.calculate("num_vehicles", map_to="household").values |
579 | 599 |
|
580 | | - np.random.seed(42) |
| 600 | + # Local RNG — see note at module level (_HAS_FUEL_SEED). |
| 601 | + rng = np.random.default_rng(_HAS_FUEL_SEED) |
581 | 602 | has_vehicle = num_vehicles > 0 |
582 | | - is_ice = np.random.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE |
| 603 | + is_ice = rng.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE |
583 | 604 | has_fuel_consumption = (has_vehicle & is_ice).astype(float) |
584 | 605 | dataset.household["has_fuel_consumption"] = has_fuel_consumption |
585 | 606 |
|
|
0 commit comments