Skip to content

Commit 6c470d0

Browse files
easelclaude
andcommitted
Generalize Nullable model to support arbitrary context keys
The Nullable model previously hardcoded three healthcare LOBs (MD, MP, ME). Now uses Pydantic's extra="allow" to accept any context keys, making tablespec usable across domains (regional, environmental, etc.) while maintaining full backward compatibility with existing YAML files. Model changes: - Nullable uses ConfigDict(extra="allow") instead of fixed fields - is_nullable_for_all_contexts() iterates dynamic fields via model_dump() - JSON schema updated to use additionalProperties instead of fixed props Consumer updates: - Renamed lob/lobs variables to context/contexts across prompts, GX baseline, generators, validators - Updated docstrings from "LOB-aware" to "context-aware" - Excel converter keeps MD/MP/ME as default presentation columns Tests: - 4 new tests for custom contexts, mixed contexts, model_dump behavior - All 1889 tests passing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8c19979 commit 6c470d0

13 files changed

Lines changed: 126 additions & 101 deletions

src/tablespec/excel_converter.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import contextlib
1212
import json
1313
import logging
14-
import re
1514
from pathlib import Path
1615
from typing import Any, ClassVar
1716

@@ -77,7 +76,10 @@ class ExcelConstants:
7776
# Table type options
7877
TABLE_TYPES: ClassVar[list[str]] = ["provided", "generated", "lookup"]
7978

80-
# LOB options
79+
# Default nullable context keys for Excel column headers.
80+
# These represent the default healthcare contexts (Medicaid/Marketplace/Medicare).
81+
# The Excel format uses a fixed set of columns, so these serve as the default
82+
# presentation layer even though the underlying Nullable model accepts arbitrary keys.
8183
LOBS: ClassVar[list[str]] = ["MD", "MP", "ME"]
8284

8385
# Domain types

src/tablespec/gx_baseline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -249,17 +249,17 @@ def generate_baseline_column_expectations(
249249
# 3. Nullability (from UMF nullable field, not profiling)
250250
nullable = column.get("nullable", {})
251251
if nullable:
252-
# Check if required for any LOB
253-
required_lobs = [lob for lob, is_null in nullable.items() if not is_null]
254-
if required_lobs:
252+
# Check if required for any context
253+
required_contexts = [ctx for ctx, is_null in nullable.items() if not is_null]
254+
if required_contexts:
255255
expectations.append(
256256
{
257257
"type": "expect_column_values_to_not_be_null",
258258
"kwargs": {"column": column_name},
259259
"meta": {
260-
"description": f"Column {column_name} is required (nullable=false) for LOBs: {', '.join(required_lobs)}",
260+
"description": f"Column {column_name} is required (nullable=false) for contexts: {', '.join(required_contexts)}",
261261
"severity": "critical",
262-
"lob": required_lobs,
262+
"contexts": required_contexts,
263263
"generated_from": "baseline",
264264
},
265265
}

src/tablespec/models/umf.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,19 @@ def normalize_filename_pattern(cls, data: Any) -> Any:
200200

201201

202202
class Nullable(BaseModel):
203-
"""Nullable configuration per Line of Business."""
203+
"""Nullable configuration per context (e.g., Line of Business).
204204
205-
MD: bool | None = Field(default=None, description="Nullable for Medicaid")
206-
MP: bool | None = Field(default=None, description="Nullable for Medicare Part D")
207-
ME: bool | None = Field(default=None, description="Nullable for Medicare")
205+
Accepts arbitrary context keys with boolean values. Common healthcare
206+
contexts include MD (Medicaid), MP (Medicare Part D), ME (Medicare),
207+
but any domain-specific keys are supported.
208+
209+
Examples:
210+
Nullable(MD=False, MP=False, ME=False) # Healthcare LOBs
211+
Nullable(US=False, EU=True) # Regional contexts
212+
Nullable(production=False, staging=True) # Environment contexts
213+
"""
214+
215+
model_config = ConfigDict(extra="allow")
208216

209217

210218
class JoinViaSpec(BaseModel):
@@ -565,9 +573,9 @@ def precision_recommended_for_decimal(cls, v, info) -> int | None:
565573
return v
566574

567575
def is_nullable_for_all_contexts(self) -> bool:
568-
"""Check if column is nullable across all LOB contexts.
576+
"""Check if column is nullable across all contexts.
569577
570-
Returns True if nullable for all contexts or if nullable is True.
578+
Returns True if nullable for all contexts or if nullable is not specified.
571579
Returns False if required (non-nullable) for any context.
572580
"""
573581
if self.nullable is None:
@@ -576,17 +584,14 @@ def is_nullable_for_all_contexts(self) -> bool:
576584
return self.nullable
577585
if isinstance(self.nullable, dict):
578586
return all(self.nullable.values())
579-
# If it's a Nullable model instance
580-
return all(
581-
[
582-
getattr(self.nullable, "MD", True),
583-
getattr(self.nullable, "MP", True),
584-
getattr(self.nullable, "ME", True),
585-
]
586-
)
587+
# Nullable model instance — iterate all set fields (extra fields included)
588+
fields = self.nullable.model_dump(exclude_none=True)
589+
if not fields:
590+
return True # No contexts defined = nullable by default
591+
return all(fields.values())
587592

588593
def is_required_for_any_context(self) -> bool:
589-
"""Check if column is required (non-nullable) for any LOB context.
594+
"""Check if column is required (non-nullable) for any context.
590595
591596
Returns True if required for at least one context.
592597
Returns False if nullable for all contexts.

src/tablespec/prompts/column_validation.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
from typing import Any
55

66
from tablespec.gx_baseline import BaselineExpectationGenerator
7-
8-
logger = logging.getLogger(__name__)
97
from tablespec.prompts.expectation_guide import (
108
format_quick_reference,
119
get_pending_decision_tree,
1210
)
1311

12+
logger = logging.getLogger(__name__)
13+
1414

1515
def should_generate_column_prompt(col: dict[str, Any]) -> bool:
1616
"""Check if column needs a dedicated validation prompt.
@@ -185,8 +185,8 @@ def generate_column_validation_prompt(
185185
exp.get("meta", {}).get("generated_from") == "domain_type" for exp in baseline_expectations
186186
)
187187

188-
# Required LOBs
189-
req_lobs = [lob for lob, is_null in sorted(nullable.items()) if not is_null]
188+
# Required contexts (e.g. LOBs like MD/ME/MP, or any configurable keys)
189+
req_contexts = [ctx for ctx, is_null in sorted(nullable.items()) if not is_null]
190190

191191
# Build domain type hint section
192192
domain_type_section = ""
@@ -274,8 +274,8 @@ def generate_column_validation_prompt(
274274
**{col_name}** ({data_type}): {col_desc}
275275
"""
276276

277-
if req_lobs:
278-
prompt += f"- **Required for LOBs**: {', '.join(req_lobs)}\n"
277+
if req_contexts:
278+
prompt += f"- **Required for contexts**: {', '.join(req_contexts)}\n"
279279
if format_spec:
280280
prompt += f"- **Format**: {format_spec}\n"
281281
if max_length:

src/tablespec/prompts/survivorship.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def generate_survivorship_prompt_per_column(
675675
# Build nullable section
676676
nullable_section = ""
677677
if nullable:
678-
nullable_parts = [f"{lob}: {str(val).lower()}" for lob, val in sorted(nullable.items())]
678+
nullable_parts = [f"{ctx}: {str(val).lower()}" for ctx, val in sorted(nullable.items())]
679679
if nullable_parts:
680680
nullable_section = f"\n- **Nullable**: {', '.join(nullable_parts)}"
681681

src/tablespec/prompts/validation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,10 @@ def generate_validation_prompt(umf_data: dict[str, Any]) -> str:
337337
# Build compact single-line format: **COL** (TYPE): Desc. [Req: LOBs.] [Fmt: X.] [Ex: a,b.] [Note: X.]
338338
line = f"**{col_name}** ({data_type}): {col_desc}."
339339

340-
# Add required LOBs (only non-nullable)
341-
req_lobs = [lob for lob, is_null in sorted(nullable.items()) if not is_null]
342-
if req_lobs:
343-
line += f" Req: {'/'.join(req_lobs)}."
340+
# Add required contexts (only non-nullable)
341+
req_contexts = [ctx for ctx, is_null in sorted(nullable.items()) if not is_null]
342+
if req_contexts:
343+
line += f" Req: {'/'.join(req_contexts)}."
344344

345345
# Add format
346346
if format_spec:

src/tablespec/prompts/validation_per_column.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@ def _generate_column_validation_prompt_focused(
7777
format_spec = column_data.get("format", "")
7878
domain_type = column_data.get("domain_type")
7979

80-
# Required LOBs
81-
req_lobs = [lob for lob, is_null in sorted(nullable.items()) if not is_null]
80+
# Required contexts (e.g. LOBs like MD/ME/MP, or any configurable keys)
81+
req_contexts = [ctx for ctx, is_null in sorted(nullable.items()) if not is_null]
8282

8383
# Build auto-generated validations list
8484
auto_validations = [
8585
"- Column existence check",
8686
f"- Data type validation ({data_type})",
87-
f"- Nullability constraints (required for: {', '.join(req_lobs) if req_lobs else 'none'})",
87+
f"- Nullability constraints (required for: {', '.join(req_contexts) if req_contexts else 'none'})",
8888
]
8989
if max_length:
9090
auto_validations.append(f"- Max length constraint ({max_length} characters)")
@@ -106,7 +106,7 @@ def _generate_column_validation_prompt_focused(
106106
- **Column**: {column_name}
107107
- **Type**: {data_type}
108108
- **Max Length**: {max_length or "N/A"}
109-
- **Required LOBs**: {", ".join(req_lobs) if req_lobs else "None"}
109+
- **Required Contexts**: {", ".join(req_contexts) if req_contexts else "None"}
110110
- **Description**: {col_desc}
111111
{f"- **Format**: {format_spec}" if format_spec else ""}
112112

src/tablespec/schemas/generators.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ class JSONSchema(TypedDict, total=False):
2323

2424

2525
def _resolve_nullable(nullable_value: Any) -> bool:
26-
"""Resolve nullable value from bool, dict (LOB-specific), or None.
26+
"""Resolve nullable value from bool, dict (context-specific), or None.
2727
28-
Handles both simple boolean nullable flags and LOB-specific nullable dicts
29-
(e.g., {"MD": True, "MP": False, "ME": True} for Medicaid/Medicare).
28+
Handles both simple boolean nullable flags and context-specific nullable dicts
29+
(e.g., {"MD": True, "MP": False, "ME": True} or any arbitrary context keys).
3030
3131
Returns True (nullable) by default when value is missing or unrecognized.
3232
"""

src/tablespec/schemas/umf.schema.json

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -741,50 +741,12 @@
741741
"type": "object"
742742
},
743743
"Nullable": {
744-
"description": "Nullable configuration per Line of Business.",
745-
"properties": {
746-
"MD": {
747-
"anyOf": [
748-
{
749-
"type": "boolean"
750-
},
751-
{
752-
"type": "null"
753-
}
754-
],
755-
"default": null,
756-
"description": "Nullable for Medicaid",
757-
"title": "Md"
758-
},
759-
"MP": {
760-
"anyOf": [
761-
{
762-
"type": "boolean"
763-
},
764-
{
765-
"type": "null"
766-
}
767-
],
768-
"default": null,
769-
"description": "Nullable for Medicare Part D",
770-
"title": "Mp"
771-
},
772-
"ME": {
773-
"anyOf": [
774-
{
775-
"type": "boolean"
776-
},
777-
{
778-
"type": "null"
779-
}
780-
],
781-
"default": null,
782-
"description": "Nullable for Medicare",
783-
"title": "Me"
784-
}
744+
"description": "Nullable configuration per context. Keys are context identifiers (e.g., MD, MP, ME for healthcare LOBs), values are booleans indicating whether the column is nullable in that context.",
745+
"type": "object",
746+
"additionalProperties": {
747+
"type": "boolean"
785748
},
786-
"title": "Nullable",
787-
"type": "object"
749+
"title": "Nullable"
788750
},
789751
"OutgoingRelationship": {
790752
"description": "Outgoing foreign key relationship to another table.",

src/tablespec/spark_factory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import os
1515
from pathlib import Path
1616
import sys
17-
from typing import TYPE_CHECKING, Any
17+
from typing import TYPE_CHECKING
1818
import warnings
1919

2020
# Disable tqdm progress bars and suppress warnings BEFORE importing PySpark

0 commit comments

Comments
 (0)