@@ -21,6 +21,32 @@ def empty_report():
2121 return init_report ()
2222
2323
24+ @pytest .fixture
25+ def valid_customers_df ():
26+ return pd .DataFrame (
27+ {
28+ "customer_id" : pd .Series (["customer1" , "customer2" ], dtype = "string" ),
29+ "customer_zip_code_prefix" : pd .Series (["zip1" , "zip2" ], dtype = "string" ),
30+ "customer_city" : pd .Series (["city1" , "city2" ], dtype = "string" ),
31+ "customer_state" : pd .Series (["state1" , "state2" ], dtype = "string" ),
32+ }
33+ )
34+
35+
36+ @pytest .fixture
37+ def valid_products_df ():
38+ return pd .DataFrame (
39+ {
40+ "product_id" : pd .Series (["prod1" , "prod2" ], dtype = "string" ),
41+ "product_category_name" : pd .Series (["categ1" , "categ2" ], dtype = "string" ),
42+ "product_weight_g" : pd .Series ([491 , 500 ], dtype = "float64" ),
43+ "product_length_cm" : pd .Series ([19.0 , 20.0 ], dtype = "float64" ),
44+ "product_height_cm" : pd .Series ([12.0 , 13.0 ], dtype = "float64" ),
45+ "product_width_cm" : pd .Series ([16.0 , 15.0 ], dtype = "float64" ),
46+ }
47+ )
48+
49+
2450@pytest .fixture
2551def valid_assembled_df ():
2652 return pd .DataFrame (
@@ -30,6 +56,7 @@ def valid_assembled_df():
3056 dtype = "string" ,
3157 ),
3258 "seller_id" : pd .Series (["seller1" , "seller2" ], dtype = "string" ),
59+ "customer_id" : pd .Series (["customer1" , "customer2" ], dtype = "string" ),
3360 "order_revenue" : pd .Series ([12.34 , 56.78 ], dtype = "float64" ),
3461 "product_id" : pd .Series (["prod1" , "prod2" ], dtype = "string" ),
3562 "order_status" : pd .Series (["delivered" , "cancelled" ], dtype = "string" ),
@@ -139,9 +166,11 @@ def test_log_info_appends_only_to_info(empty_report):
139166# =============================================================================
140167
141168
142- def test_seller_semantic_model_grain_preserved_success (valid_assembled_df ):
169+ def test_seller_semantic_model_grain_preserved_success (tmp_path , valid_assembled_df ):
170+
171+ run_context = RunContext .create (base_path = tmp_path )
143172
144- seller_semantic = build_seller_semantic (valid_assembled_df )
173+ seller_semantic = build_seller_semantic (valid_assembled_df , run_context )
145174 expected = (
146175 valid_assembled_df [["seller_id" , "order_year_week" ]].drop_duplicates ().shape [0 ]
147176 )
@@ -156,21 +185,28 @@ def test_seller_semantic_model_grain_preserved_success(valid_assembled_df):
156185 )
157186
158187
159- def test_seller_semantic_fails_on_multiple_run_ids (valid_assembled_df ):
188+ def test_seller_semantic_fails_on_multiple_run_ids (tmp_path , valid_assembled_df ):
189+
190+ run_context = RunContext .create (base_path = tmp_path )
160191
161192 broken_df = valid_assembled_df .copy ()
162193 broken_df .loc [1 , "run_id" ] = "another_run"
163194
164195 with pytest .raises (RuntimeError ):
165- build_seller_semantic (broken_df )
196+ build_seller_semantic (broken_df , run_context )
166197
167198
168199# =============================================================================
169200# BUILD BI SEMANTIC
170201# =============================================================================
171202
172203
173- def test_build_semantic_layer_success (tmp_path , valid_assembled_df ):
204+ def test_build_semantic_layer_success (
205+ tmp_path ,
206+ valid_assembled_df ,
207+ valid_customers_df ,
208+ valid_products_df ,
209+ ):
174210
175211 run_context = RunContext .create (base_path = tmp_path , run_id = "dummy_run_id" )
176212 run_context .initialize_directories ()
@@ -179,23 +215,27 @@ def test_build_semantic_layer_success(tmp_path, valid_assembled_df):
179215 run_context .assembled_path / "assembled_events_2023_01.parquet"
180216 )
181217
182- report = build_semantic_layer (run_context )
218+ valid_customers_df .to_parquet (
219+ run_context .contracted_path / "df_customers_contracted.parquet"
220+ )
183221
184- for module in SEMANTIC_MODULES :
222+ valid_products_df .to_parquet (
223+ run_context .contracted_path / "df_products_contracted.parquet"
224+ )
185225
186- output_path_seller = (
187- run_context .semantic_path
188- / module
189- / "seller_week_performance_fact_dumm_y_.parquet"
190- )
226+ report = build_semantic_layer (run_context )
191227
192- output_path_dim = (
193- run_context .semantic_path / module / "seller_dim_dumm_y_.parquet"
194- )
228+ for module_name , module in SEMANTIC_MODULES .items ():
229+ for table_name in module ["tables" ]:
195230
196- assert report ["status" ] == "success"
197- assert output_path_seller .exists ()
198- assert output_path_dim .exists ()
231+ outputs_path = (
232+ run_context .semantic_path
233+ / module_name
234+ / f"{ table_name } _dumm_y_.parquet"
235+ )
236+
237+ assert report ["status" ] == "success"
238+ assert outputs_path .exists ()
199239
200240
201241def test_build_semantic_layer_fails_on_multiple_ids (tmp_path , valid_assembled_df ):
@@ -242,6 +282,25 @@ def test_build_semantic_layer_fails_on_missing_columns(tmp_path, valid_assembled
242282 assert any ("approval_lag_days" in error for error in module_error )
243283
244284
285+ def test_build_semantic_layer_fails_on_missing_or_empty_df (tmp_path ):
286+
287+ empty_df = pd .DataFrame ()
288+
289+ run_context = RunContext .create (base_path = tmp_path , run_id = "dummy_run_id" )
290+ run_context .initialize_directories ()
291+
292+ empty_df .to_parquet (run_context .assembled_path / "assembled_events_2023_01.parquet" )
293+
294+ report = build_semantic_layer (run_context )
295+
296+ assert report ["status" ] == "failed"
297+ assert report ["failed_step" ] == "load_tables"
298+
299+ load_error = report ["steps" ]["load_tables" ]["errors" ]
300+
301+ assert any ("missing or empty" in error for error in load_error )
302+
303+
245304# =============================================================================
246305# UNIT TESTS END
247306# =============================================================================
0 commit comments