Skip to content

Commit cb829ac

Browse files
benjelloclaude
andcommitted
feat: wire _id_to_rownum identity mapping in SimulationBuilder
Populate `_id_to_rownum` with the identity mapping `[0, 1, ..., n-1]` for all static simulations built via `build_default_simulation` and `build_from_dict` / `build_from_entities`, preparing the ground for dynamic populations (LIAM2-inspired). - Add `_BuildDefaultSimulation.add_id_to_rownum()` and chain it in `build_default_simulation()` - Set `population._id_to_rownum` in `finalize_variables_init()` after `count` and `ids` are known - Add tests: basic builder paths + edge cases (empty, single, group TBS, dtype check, index round-trip) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 5117dfb commit cb829ac

3 files changed

Lines changed: 159 additions & 0 deletions

File tree

openfisca_core/simulations/_build_default_simulation.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class _BuildDefaultSimulation:
3030
... .add_count()
3131
... .add_ids()
3232
... .add_members_entity_id()
33+
... .add_id_to_rownum()
3334
... )
3435
3536
>>> builder.count
@@ -121,6 +122,16 @@ def add_ids(self) -> Self:
121122

122123
return self
123124

125+
def add_id_to_rownum(self) -> Self:
126+
"""Set identity id_to_rownum mapping on all populations.
127+
128+
For static simulations, each entity's permanent ID equals its row
129+
position, so id_to_rownum is the identity: id_to_rownum[i] = i.
130+
"""
131+
for population in self.populations.values():
132+
population._id_to_rownum = numpy.arange(self.count, dtype=numpy.intp)
133+
return self
134+
124135
def add_members_entity_id(self) -> Self:
125136
"""Add ???
126137

openfisca_core/simulations/simulation_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ def build_default_simulation(
327327
.add_count()
328328
.add_ids()
329329
.add_members_entity_id()
330+
.add_id_to_rownum()
330331
.simulation
331332
)
332333

@@ -659,6 +660,7 @@ def finalize_variables_init(self, population) -> None:
659660
if plural_key in self.entity_counts:
660661
population.count = self.get_count(plural_key)
661662
population.ids = self.get_ids(plural_key)
663+
population._id_to_rownum = numpy.arange(population.count, dtype=numpy.intp)
662664
if plural_key in self.memberships:
663665
population.members_entity_id = numpy.array(self.get_memberships(plural_key))
664666
population.members_role = numpy.array(self.get_roles(plural_key))
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import numpy as np
2+
3+
from openfisca_core import entities, periods, taxbenefitsystems
4+
from openfisca_core.entities.entity import Entity
5+
from openfisca_core.populations._core_population import CorePopulation
6+
from openfisca_core.simulations import SimulationBuilder
7+
8+
9+
def test_get_period_id_to_rownum_remapping():
10+
# Setup entity and population
11+
entity = Entity("person", "people", "", "")
12+
pop = CorePopulation(entity)
13+
pop.count = 3
14+
15+
# initial mapping: identity (0->0,1->1,2->2)
16+
pop._id_to_rownum = np.array([0, 1, 2], dtype=np.intp)
17+
t0 = periods.period("2010-01")
18+
pop.snapshot_period(t0)
19+
20+
# change mapping to simulate reordering/new indexing
21+
# now id 0 -> row 2, id 1 -> row 0, id 2 -> row 1
22+
pop._id_to_rownum = np.array([2, 0, 1], dtype=np.intp)
23+
t1 = periods.period("2010-02")
24+
pop.snapshot_period(t1)
25+
26+
# ids to remap (as stored in data referring to t0)
27+
ids = np.array([0, 0, 1, 2, 0], dtype=np.intp)
28+
29+
past_index = pop.get_period_id_to_rownum(t0)
30+
assert past_index is not None
31+
rows_t0 = past_index[ids]
32+
# with identity mapping, rows should equal the ids
33+
assert np.array_equal(rows_t0, np.array([0, 0, 1, 2, 0], dtype=np.intp))
34+
35+
# current mapping produces different rows
36+
current_index = pop.get_period_id_to_rownum(t1)
37+
assert current_index is not None
38+
rows_t1 = current_index[ids]
39+
assert np.array_equal(rows_t1, np.array([2, 2, 0, 1, 2], dtype=np.intp))
40+
41+
42+
def _make_tbs():
43+
"""Return a minimal TaxBenefitSystem with one person entity."""
44+
person = entities.Entity("person", "persons", "", "")
45+
return taxbenefitsystems.TaxBenefitSystem([person])
46+
47+
48+
def _make_group_tbs():
49+
"""Return a TaxBenefitSystem with a person and a group entity."""
50+
person = entities.SingleEntity("person", "persons", "", "")
51+
household = entities.GroupEntity(
52+
"household", "households", "", "", roles=[{"key": "member"}]
53+
)
54+
return taxbenefitsystems.TaxBenefitSystem([person, household])
55+
56+
57+
def test_build_default_simulation_sets_id_to_rownum():
58+
"""SimulationBuilder.build_default_simulation populates _id_to_rownum."""
59+
tbs = _make_tbs()
60+
sim = SimulationBuilder().build_default_simulation(tbs, count=3)
61+
for pop in sim.populations.values():
62+
assert pop._id_to_rownum is not None
63+
assert np.array_equal(pop._id_to_rownum, np.arange(3, dtype=np.intp))
64+
65+
66+
def test_build_from_dict_sets_id_to_rownum():
67+
"""SimulationBuilder.build_from_dict populates _id_to_rownum."""
68+
tbs = _make_tbs()
69+
sim = SimulationBuilder().build_from_dict(
70+
tbs,
71+
{
72+
"persons": {
73+
"p0": {},
74+
"p1": {},
75+
"p2": {},
76+
},
77+
},
78+
)
79+
pop = sim.populations["person"]
80+
assert pop._id_to_rownum is not None
81+
assert np.array_equal(pop._id_to_rownum, np.arange(3, dtype=np.intp))
82+
83+
84+
def test_build_default_simulation_empty():
85+
"""count=0 produces an empty identity mapping, not None."""
86+
tbs = _make_tbs()
87+
sim = SimulationBuilder().build_default_simulation(tbs, count=0)
88+
for pop in sim.populations.values():
89+
assert pop._id_to_rownum is not None
90+
assert pop._id_to_rownum.shape == (0,)
91+
assert pop._id_to_rownum.dtype == np.intp
92+
93+
94+
def test_build_default_simulation_single():
95+
"""count=1 produces a single-element identity mapping."""
96+
tbs = _make_tbs()
97+
sim = SimulationBuilder().build_default_simulation(tbs, count=1)
98+
for pop in sim.populations.values():
99+
assert np.array_equal(pop._id_to_rownum, np.array([0], dtype=np.intp))
100+
101+
102+
def test_build_default_simulation_group_tbs_both_populations():
103+
"""Both person and group populations receive _id_to_rownum."""
104+
tbs = _make_group_tbs()
105+
sim = SimulationBuilder().build_default_simulation(tbs, count=2)
106+
assert np.array_equal(sim.populations["person"]._id_to_rownum, [0, 1])
107+
assert np.array_equal(sim.populations["household"]._id_to_rownum, [0, 1])
108+
109+
110+
def test_build_from_dict_group_tbs_both_populations():
111+
"""build_from_dict sets _id_to_rownum on both person and group populations."""
112+
tbs = _make_group_tbs()
113+
sim = SimulationBuilder().build_from_dict(
114+
tbs,
115+
{
116+
"persons": {"p0": {}, "p1": {}, "p2": {}},
117+
"households": {
118+
"h0": {"member": ["p0", "p1"]},
119+
"h1": {"member": ["p2"]},
120+
},
121+
},
122+
)
123+
assert np.array_equal(
124+
sim.populations["person"]._id_to_rownum, np.arange(3, dtype=np.intp)
125+
)
126+
assert np.array_equal(
127+
sim.populations["household"]._id_to_rownum, np.arange(2, dtype=np.intp)
128+
)
129+
130+
131+
def test_id_to_rownum_dtype():
132+
"""_id_to_rownum always has dtype numpy.intp."""
133+
tbs = _make_tbs()
134+
sim = SimulationBuilder().build_default_simulation(tbs, count=4)
135+
pop = sim.populations["person"]
136+
assert pop._id_to_rownum.dtype == np.intp
137+
138+
139+
def test_id_to_rownum_usable_as_index():
140+
"""_id_to_rownum identity mapping round-trips: rownum[id] == id."""
141+
tbs = _make_tbs()
142+
count = 5
143+
sim = SimulationBuilder().build_default_simulation(tbs, count=count)
144+
pop = sim.populations["person"]
145+
ids = np.arange(count, dtype=np.intp)
146+
assert np.array_equal(pop._id_to_rownum[ids], ids)

0 commit comments

Comments
 (0)