88import pandas as pd
99from typing import Dict , List , Tuple , Literal
1010from data_pipeline .shared .run_context import RunContext
11- from data_pipeline .shared .table_configs import (
11+ from data_pipeline .shared .modeling_configs import (
1212 SELLER_DIM_ENFORCED_SCHEMA ,
1313 SELLER_DIM_ENFORCED_DTYPES ,
1414 SELLER_FACT_ENFORCED_SCHEMA ,
@@ -45,23 +45,22 @@ def seller_weekly_semantic(
4545 df : pd .DataFrame ,
4646) -> Tuple [pd .DataFrame , pd .DataFrame ]:
4747 """
48- Seller weekly semantic builder.
49-
50- Transforms the assembled event table into seller-level weekly
51- performance fact and supporting seller dimension.
52-
53- Transform behavior:
54-
55- - Validates single-run lineage via `run_id`
56- - Derives weekly alignment fields and status flags
57- - Aggregates event data to seller-week grain
58- - Builds seller dimension from first observed activity
48+ Build seller weekly semantic layer from assembled events.
5949
6050 Fact grain:
61- - One row per (seller_id, order_year_week)
51+ - 1 row per (seller_id, order_year_week)
6252
6353 Dimension grain:
64- - One row per seller_id
54+ - 1 row per seller_id
55+
56+ Behavior:
57+ - Enforce single run_id lineage
58+ - Derive ISO week alignment
59+ - Aggregate event metrics to seller-week
60+
61+ Returns:
62+ - Aggregated fact dataframe
63+ - Seller dimension dataframe
6564 """
6665
6766 read_assembled = df .copy ()
@@ -109,40 +108,26 @@ def freeze_seller_semantic(
109108 table_type : Literal ["fact" , "dim" ],
110109) -> pd .DataFrame :
111110 """
112- Seller semantic contract enforcer .
111+ Enforce seller semantic contract.
113112
114- Routes the input table to the appropriate fact or dimension
115- contract freezer and enforces grain integrity before projection.
113+ For fact:
114+ - Validate required columns
115+ - Enforce (seller_id, order_year_week) uniqueness
116+ - Apply projection, dtype enforcement, deterministic sort
116117
117- Behavior:
118+ For dimension:
119+ - Validate required columns
120+ - Enforce seller_id uniqueness
121+ - Apply projection and dtype enforcement
118122
119- - Validates `table_type` selector
120- - Applies grain-level duplicate checks:
121- - fact: (seller_id, order_year_week)
122- - dim: seller_id
123- - Dispatches to the corresponding schema freezer
124- - Returns a BI-ready, schema-stable dataframe
123+ Raise:
124+ - RuntimeError on schema or grain violation
125125 """
126126
127127 if table_type not in {"fact" , "dim" }:
128128 raise ValueError
129129
130130 def freeze_seller_fact (df : pd .DataFrame ) -> pd .DataFrame :
131- """
132- Seller weekly fact contract enforcement.
133-
134- Projects the aggregated seller-week dataset into the approved
135- fact schema, enforces dtypes, and applies deterministic ordering.
136-
137- Enforcement actions:
138-
139- - Validates presence of all required fact columns
140- - Projects to the contract column order
141- - Casts fields to enforced dtypes
142- - Sorts by (seller_id, order_year_week)
143- - Resets index for clean downstream consumption
144-
145- """
146131
147132 fact_contract = df .copy ()
148133
@@ -161,20 +146,6 @@ def freeze_seller_fact(df: pd.DataFrame) -> pd.DataFrame:
161146 return fact_contract
162147
163148 def freeze_seller_dim (df : pd .DataFrame ) -> pd .DataFrame :
164- """
165- Seller dimension contract enforcement.
166-
167- Projects the seller dimension into the approved schema,
168- enforces dtypes, and applies deterministic ordering.
169-
170- Enforcement actions:
171-
172- - Validates presence of all required dimension columns
173- - Projects to the contract column order
174- - Casts fields to enforced dtypes
175- - Sorts by seller_id
176- - Resets index for clean downstream consumption
177- """
178149
179150 dim_contract = df .copy ()
180151
@@ -216,20 +187,24 @@ def build_semantic_layer(run_context: RunContext) -> Dict:
216187 """
217188 Semantic layer orchestrator.
218189
219- Builds seller performance semantic tables from the assembled
220- event layer and exports contract-compliant BI artifacts.
190+ Builds semantic modules from the assembled event layer and
191+ exports contract-compliant BI artifacts.
192+
193+ Execution:
194+ - Load assembled event dataset (order grain)
195+ - Fail fast if dataset is missing or empty
196+ - Execute registered semantic builders
197+ - Apply module-level freeze contracts
198+ - Export semantic artifacts into run-scoped semantic directory
199+ - Aggregate findings into report
221200
222- Chronological behavior:
201+ Guarantees:
202+ - Each semantic module enforces its declared grain
203+ - All exported tables are contract-compliant
204+ - No decision logic embedded
223205
224- - Initializes run-scoped reporting and logging helpers.
225- - Loads the assembled_events logical table.
226- - Fails fast if the assembled dataset is missing or empty.
227- - Executes semantic pipeline:
228- - seller_weekly_semantic (aggregation)
229- - freeze_seller_semantic (fact and dimension contracts)
230- - Generates run-partitioned output filenames.
231- - Exports semantic tables to the run-scoped semantic directory.
232- - Aggregates all findings into the returned report.
206+ Failure:
207+ - Any module failure halts semantic stage
233208 """
234209
235210 report = init_report ()
0 commit comments