Skip to content

Commit 892dd1e

Browse files
feat: scope threading + population-scope-aware sampling
Thread `scope` (individual/household) through the full spec builder pipeline: selector → hydrators → binder → AttributeSpec. Add `agent_focus` extraction in sufficiency check, propagated through SpecMeta to the sampler. Sampler now classifies agent_focus into three modes: - "all" (families): everyone is an agent, kids promoted to full agents - "couples" (retired couples): both partners are agents, kids are NPCs - "primary_only" (surgeons, students): only primary adult is agent, partner becomes NPC context attached to agent New sampler functions: _classify_agent_focus, _generate_npc_partner, _sample_dependent_as_agent. Household sampling loop branches on focus mode for partner and dependent handling.
1 parent 94ecb0e commit 892dd1e

10 files changed

Lines changed: 247 additions & 37 deletions

File tree

extropy/cli/commands/spec.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,12 @@ def spec_command(
6464

6565
size = sufficiency_result.size
6666
geography = sufficiency_result.geography
67+
agent_focus = sufficiency_result.agent_focus
6768
geo_str = f", {geography}" if geography else ""
68-
console.print(f"[green]✓[/green] Context sufficient ({size} agents{geo_str})")
69+
focus_str = f", focus: {agent_focus}" if agent_focus else ""
70+
console.print(
71+
f"[green]✓[/green] Context sufficient ({size} agents{geo_str}{focus_str})"
72+
)
6973

7074
# Step 1: Attribute Selection
7175
console.print()
@@ -207,6 +211,7 @@ def do_hydration():
207211
attributes=bound_attrs,
208212
sampling_order=sampling_order,
209213
sources=sources,
214+
agent_focus=agent_focus,
210215
)
211216

212217
console.print("[green]✓[/green] Spec assembled")

extropy/core/models/population.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ class SpecMeta(BaseModel):
304304
description: str = Field(description="Original population description")
305305
size: int = Field(description="Number of agents to generate")
306306
geography: str | None = Field(default=None, description="Geographic scope")
307+
agent_focus: str | None = Field(
308+
default=None,
309+
description="Who the study agents represent. Determines agent vs NPC partitioning in household sampling.",
310+
)
307311
created_at: datetime = Field(default_factory=datetime.now)
308312
version: str = Field(default="1.0", description="Spec format version")
309313
persona_template: str | None = Field(
@@ -460,6 +464,7 @@ def merge(self, extension: "PopulationSpec") -> "PopulationSpec":
460464
description=f"{self.meta.description} + {extension.meta.description}",
461465
size=self.meta.size,
462466
geography=self.meta.geography,
467+
agent_focus=self.meta.agent_focus,
463468
created_at=datetime.now(),
464469
version=self.meta.version,
465470
persona_template=None,
@@ -536,6 +541,10 @@ class DiscoveredAttribute(BaseModel):
536541
default="independent",
537542
description="independent: sample directly; derived: zero-variance formula; conditional: probabilistic dependency",
538543
)
544+
scope: Literal["individual", "household"] = Field(
545+
default="individual",
546+
description="individual: varies per person; household: shared across household members",
547+
)
539548
depends_on: list[str] = Field(default_factory=list)
540549

541550

@@ -558,6 +567,10 @@ class HydratedAttribute(BaseModel):
558567
strategy: Literal["independent", "derived", "conditional"] = Field(
559568
default="independent", description="Sampling strategy determined in Step 1"
560569
)
570+
scope: Literal["individual", "household"] = Field(
571+
default="individual",
572+
description="individual: varies per person; household: shared across household members",
573+
)
561574
depends_on: list[str] = Field(default_factory=list)
562575
sampling: SamplingConfig
563576
grounding: GroundingInfo
@@ -570,4 +583,8 @@ class SufficiencyResult(BaseModel):
570583
sufficient: bool
571584
size: int = Field(default=1000, description="Extracted or default population size")
572585
geography: str | None = None
586+
agent_focus: str | None = Field(
587+
default=None,
588+
description="Who this study is about, e.g. 'surgeons', 'high school students', 'retired couples', 'families'",
589+
)
573590
clarifications_needed: list[str] = Field(default_factory=list)

extropy/population/sampler/core.py

Lines changed: 172 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import sqlite3
1515
from datetime import datetime
1616
from pathlib import Path
17-
from typing import Any
17+
from typing import Any, Literal
1818

1919
from ...core.models import (
2020
PopulationSpec,
@@ -47,6 +47,30 @@ class SamplingError(Exception):
4747
pass
4848

4949

50+
def _classify_agent_focus(
51+
agent_focus: str | None,
52+
) -> Literal["all", "couples", "primary_only"]:
53+
"""Determine household agent scope from agent_focus metadata.
54+
55+
Returns:
56+
"all" — everyone in household is an agent (families, communities)
57+
"couples" — both partners are agents, kids are NPCs (retired couples, married couples)
58+
"primary_only" — only the primary adult is an agent, partner + kids are NPCs (surgeons, students, subscribers)
59+
"""
60+
if not agent_focus:
61+
return "primary_only"
62+
63+
focus_lower = agent_focus.lower()
64+
65+
if any(kw in focus_lower for kw in ("famil", "household", "everyone")):
66+
return "all"
67+
68+
if any(kw in focus_lower for kw in ("couple", "pair", "partners", "spouses")):
69+
return "couples"
70+
71+
return "primary_only"
72+
73+
5074
def _has_household_attributes(spec: PopulationSpec) -> bool:
5175
"""Check if the spec has household-scoped attributes, indicating household mode."""
5276
return any(attr.scope == "household" for attr in spec.attributes)
@@ -177,6 +201,83 @@ def _sample_population_independent(
177201
return agents
178202

179203

204+
def _generate_npc_partner(
205+
primary: dict[str, Any],
206+
household_attrs: set[str],
207+
categorical_options: dict[str, list[str]],
208+
rng: random.Random,
209+
) -> dict[str, Any]:
210+
"""Generate a lightweight NPC partner profile for context.
211+
212+
Not a full agent — just enough for persona prompts and conversations.
213+
"""
214+
partner: dict[str, Any] = {}
215+
216+
if "age" in primary:
217+
partner["age"] = correlate_partner_attribute("age", primary["age"], rng)
218+
partner["gender"] = rng.choice(["male", "female"])
219+
220+
for attr in (
221+
"race_ethnicity",
222+
"education_level",
223+
"religious_affiliation",
224+
"political_orientation",
225+
):
226+
if attr in primary:
227+
correlated = correlate_partner_attribute(
228+
attr, primary[attr], rng, categorical_options.get(attr)
229+
)
230+
if correlated is not None:
231+
partner[attr] = correlated
232+
233+
# Shared household attrs
234+
for attr in household_attrs:
235+
if attr in primary:
236+
partner[attr] = primary[attr]
237+
238+
if primary.get("last_name"):
239+
partner["last_name"] = primary["last_name"]
240+
241+
partner["relationship"] = "partner"
242+
return partner
243+
244+
245+
def _sample_dependent_as_agent(
246+
spec: PopulationSpec,
247+
attr_map: dict[str, AttributeSpec],
248+
rng: random.Random,
249+
index: int,
250+
id_width: int,
251+
stats: SamplingStats,
252+
numeric_values: dict[str, list[float]],
253+
dependent: Any,
254+
parent: dict[str, Any],
255+
household_id: str,
256+
) -> dict[str, Any]:
257+
"""Promote a dependent to a full agent with all attributes sampled.
258+
259+
Uses the dependent's known attributes (age, gender) as seeds,
260+
then samples remaining attributes normally.
261+
"""
262+
agent = _sample_single_agent(
263+
spec, attr_map, rng, index, id_width, stats, numeric_values
264+
)
265+
266+
# Override with dependent's known attributes
267+
agent["age"] = dependent.age
268+
agent["gender"] = dependent.gender
269+
agent["household_id"] = household_id
270+
agent["household_role"] = f"dependent_{dependent.relationship}"
271+
agent["relationship_to_primary"] = dependent.relationship
272+
273+
# Copy household-scoped attributes from parent
274+
for attr in spec.attributes:
275+
if attr.scope == "household" and attr.name in parent:
276+
agent[attr.name] = parent[attr.name]
277+
278+
return agent
279+
280+
180281
def _sample_population_households(
181282
spec: PopulationSpec,
182283
attr_map: dict[str, AttributeSpec],
@@ -192,6 +293,8 @@ def _sample_population_households(
192293
Returns (agents, households) where households is a list of household
193294
metadata dicts for DB persistence.
194295
"""
296+
focus_mode = _classify_agent_focus(spec.meta.agent_focus)
297+
195298
num_households = estimate_household_count(target_n)
196299
hh_id_width = len(str(num_households - 1))
197300

@@ -216,7 +319,7 @@ def _sample_population_households(
216319

217320
household_id = f"household_{hh_idx:0{hh_id_width}d}"
218321

219-
# Sample Adult 1 (primary)
322+
# Sample Adult 1 (primary) — always an agent
220323
adult1 = _sample_single_agent(
221324
spec, attr_map, rng, agent_index, id_width, stats, numeric_values
222325
)
@@ -244,45 +347,81 @@ def _sample_population_households(
244347
adult1["household_role"] = "adult_primary"
245348

246349
adult_ids = [adult1["_id"]]
247-
248-
if has_partner and agent_index < target_n:
249-
# Sample Adult 2 with correlated demographics
250-
adult2 = _sample_partner_agent(
251-
spec,
252-
attr_map,
253-
rng,
254-
agent_index,
255-
id_width,
256-
stats,
257-
numeric_values,
258-
adult1,
259-
household_attrs,
260-
categorical_options,
261-
)
262-
adult2["household_id"] = household_id
263-
adult2["household_role"] = "adult_secondary"
264-
# Partners share a surname
265-
if adult1.get("last_name"):
266-
adult2["last_name"] = adult1["last_name"]
267-
adult2["partner_id"] = adult1["_id"]
268-
adult1["partner_id"] = adult2["_id"]
269-
adult_ids.append(adult2["_id"])
270-
agent_index += 1
350+
adult2_added = False
351+
352+
if has_partner:
353+
if focus_mode in ("couples", "all") and agent_index < target_n:
354+
# Partner is a full agent
355+
adult2 = _sample_partner_agent(
356+
spec,
357+
attr_map,
358+
rng,
359+
agent_index,
360+
id_width,
361+
stats,
362+
numeric_values,
363+
adult1,
364+
household_attrs,
365+
categorical_options,
366+
)
367+
adult2["household_id"] = household_id
368+
adult2["household_role"] = "adult_secondary"
369+
if adult1.get("last_name"):
370+
adult2["last_name"] = adult1["last_name"]
371+
adult2["partner_id"] = adult1["_id"]
372+
adult1["partner_id"] = adult2["_id"]
373+
adult_ids.append(adult2["_id"])
374+
agent_index += 1
375+
adult2_added = True
376+
else:
377+
# Partner is NPC context on the primary agent
378+
npc_partner = _generate_npc_partner(
379+
adult1, household_attrs, categorical_options, rng
380+
)
381+
adult1["partner_npc"] = npc_partner
382+
adult1["partner_id"] = None
271383
else:
272384
adult1["partner_id"] = None
273385

274-
# Generate NPC dependents
386+
# Dependents
275387
dependents = generate_dependents(
276388
htype, household_size, num_adults, adult1_age, rng
277389
)
278-
dep_dicts = [d.model_dump() for d in dependents]
279390

280-
# Attach dependents to all adults
281-
adult1["dependents"] = dep_dicts
391+
if has_kids and focus_mode == "all":
392+
# Kids become full agents
393+
dep_dicts = []
394+
for dep in dependents:
395+
if agent_index >= target_n:
396+
# Remaining dependents stay as NPC data
397+
dep_dicts.append(dep.model_dump())
398+
continue
399+
kid_agent = _sample_dependent_as_agent(
400+
spec,
401+
attr_map,
402+
rng,
403+
agent_index,
404+
id_width,
405+
stats,
406+
numeric_values,
407+
dep,
408+
adult1,
409+
household_id,
410+
)
411+
agents.append(kid_agent)
412+
adult_ids.append(kid_agent["_id"])
413+
agent_index += 1
414+
# Any overflow dependents attached as NPC data
415+
adult1["dependents"] = dep_dicts
416+
else:
417+
# Kids are NPCs
418+
dep_dicts = [d.model_dump() for d in dependents]
419+
adult1["dependents"] = dep_dicts
420+
282421
agents.append(adult1)
283422

284-
if has_partner and len(adult_ids) > 1:
285-
adult2["dependents"] = dep_dicts
423+
if adult2_added:
424+
adult2["dependents"] = adult1.get("dependents", [])
286425
agents.append(adult2)
287426

288427
# Build household record
@@ -296,7 +435,7 @@ def _sample_population_households(
296435
"id": household_id,
297436
"household_type": htype.value,
298437
"adult_ids": adult_ids,
299-
"dependent_data": dep_dicts,
438+
"dependent_data": [d.model_dump() for d in dependents],
300439
"shared_attributes": shared_attrs,
301440
}
302441
)

extropy/population/spec_builder/binder.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def bind_constraints(
137137
type=attr.type,
138138
category=attr.category,
139139
description=attr.description,
140+
scope=attr.scope,
140141
sampling=filtered_sampling,
141142
grounding=attr.grounding,
142143
constraints=attr.constraints,
@@ -189,6 +190,7 @@ def build_spec(
189190
attributes: list[AttributeSpec],
190191
sampling_order: list[str],
191192
sources: list[str],
193+
agent_focus: str | None = None,
192194
) -> PopulationSpec:
193195
"""
194196
Assemble the final PopulationSpec from all components.
@@ -200,6 +202,7 @@ def build_spec(
200202
attributes: List of AttributeSpec
201203
sampling_order: Order for sampling
202204
sources: List of source URLs from research
205+
agent_focus: Who the study agents represent (determines agent vs NPC)
203206
204207
Returns:
205208
Complete PopulationSpec ready for YAML export
@@ -208,6 +211,7 @@ def build_spec(
208211
description=description,
209212
size=size,
210213
geography=geography,
214+
agent_focus=agent_focus,
211215
created_at=datetime.now(),
212216
)
213217

extropy/population/spec_builder/hydrators/conditional.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ def hydrate_conditional_base(
222222
category=original.category,
223223
description=original.description,
224224
strategy="conditional",
225+
scope=original.scope,
225226
depends_on=original.depends_on,
226227
sampling=sampling,
227228
grounding=grounding,
@@ -433,6 +434,7 @@ def hydrate_conditional_modifiers(
433434
category=original.category,
434435
description=original.description,
435436
strategy=original.strategy,
437+
scope=original.scope,
436438
depends_on=original.depends_on,
437439
sampling=new_sampling,
438440
grounding=original.grounding,

extropy/population/spec_builder/hydrators/derived.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ def hydrate_derived(
150150
category=original.category,
151151
description=original.description,
152152
strategy="derived",
153+
scope=original.scope,
153154
depends_on=original.depends_on,
154155
sampling=sampling,
155156
grounding=grounding,

extropy/population/spec_builder/hydrators/independent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def hydrate_independent(
186186
category=original.category,
187187
description=original.description,
188188
strategy="independent",
189+
scope=original.scope,
189190
depends_on=[],
190191
sampling=sampling,
191192
grounding=grounding,

0 commit comments

Comments
 (0)