|
25 | 25 | from policyengine_uk_data.storage import STORAGE_FOLDER |
26 | 26 | from policyengine_uk.data import UKSingleYearDataset |
27 | 27 | from policyengine_uk import Microsimulation |
| 28 | +from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR |
28 | 29 |
|
29 | 30 | LCFS_TAB_FOLDER = STORAGE_FOLDER / "lcfs_2021_22" |
30 | 31 |
|
| 32 | +# Default seed for the stochastic ICE-vehicle flag drawn from |
| 33 | +# `NTS_2024_ICE_VEHICLE_SHARE`. Kept at 42 for backward compatibility with |
| 34 | +# existing artefact fingerprints; callers can override via the fixture's |
| 35 | +# local RNG rather than the process-wide np.random global. |
| 36 | +_HAS_FUEL_SEED = 42 |
| 37 | + |
31 | 38 | # EV/ICE vehicle mix from NTS 2024 |
32 | 39 | NTS_2024_ICE_VEHICLE_SHARE = 0.90 |
33 | 40 |
|
@@ -406,9 +413,12 @@ def create_has_fuel_model(): |
406 | 413 |
|
407 | 414 | num_vehicles = was["vcarnr7"].fillna(0).clip(lower=0) |
408 | 415 | has_vehicle = num_vehicles > 0 |
409 | | - np.random.seed(42) |
| 416 | + # Use a local RNG so we don't mutate the global np.random state (which |
| 417 | + # would silently change any unrelated consumer of np.random that runs |
| 418 | + # after this function). |
| 419 | + rng = np.random.default_rng(_HAS_FUEL_SEED) |
410 | 420 | has_fuel = ( |
411 | | - has_vehicle & (np.random.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE) |
| 421 | + has_vehicle & (rng.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE) |
412 | 422 | ).astype(float) |
413 | 423 |
|
414 | 424 | was_df = pd.DataFrame( |
@@ -481,18 +491,21 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame) |
481 | 491 |
|
482 | 492 | household = household.rename(columns=CONSUMPTION_VARIABLE_RENAMES) |
483 | 493 |
|
484 | | - # Annualise weekly LCFS values (× 52) |
| 494 | + # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant |
| 495 | + # (365.25 / 7 ≈ 52.1786) as `datasets/frs.py` rather than a bare `* 52`, |
| 496 | + # which underestimates annual totals by ~0.34% and skews VAT / energy |
| 497 | + # imputation targets against FRS income. |
485 | 498 | annualise = list(CONSUMPTION_VARIABLE_RENAMES.values()) + [ |
486 | 499 | "hbai_household_net_income", |
487 | 500 | "household_gross_income", |
488 | 501 | "electricity_consumption", |
489 | 502 | "gas_consumption", |
490 | 503 | ] |
491 | 504 | for variable in annualise: |
492 | | - household[variable] = household[variable] * 52 |
| 505 | + household[variable] = household[variable] * WEEKS_IN_YEAR |
493 | 506 | for variable in PERSON_LCF_RENAMES.values(): |
494 | 507 | household[variable] = ( |
495 | | - person[variable].groupby(person.case).sum()[household.case] * 52 |
| 508 | + person[variable].groupby(person.case).sum()[household.case] * WEEKS_IN_YEAR |
496 | 509 | ) |
497 | 510 | household.household_weight *= 1_000 |
498 | 511 |
|
@@ -577,9 +590,10 @@ def impute_consumption(dataset: UKSingleYearDataset) -> UKSingleYearDataset: |
577 | 590 | sim = Microsimulation(dataset=dataset) |
578 | 591 | num_vehicles = sim.calculate("num_vehicles", map_to="household").values |
579 | 592 |
|
580 | | - np.random.seed(42) |
| 593 | + # Local RNG — see note at module level (_HAS_FUEL_SEED). |
| 594 | + rng = np.random.default_rng(_HAS_FUEL_SEED) |
581 | 595 | has_vehicle = num_vehicles > 0 |
582 | | - is_ice = np.random.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE |
| 596 | + is_ice = rng.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE |
583 | 597 | has_fuel_consumption = (has_vehicle & is_ice).astype(float) |
584 | 598 | dataset.household["has_fuel_consumption"] = has_fuel_consumption |
585 | 599 |
|
|
0 commit comments