Skip to content

Commit 08e4b95

Browse files
committed
test: update unit tests to match refactoring
1 parent 6c421eb commit 08e4b95

2 files changed

Lines changed: 3 additions & 55 deletions

File tree

tests/test_assembly_stage.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -250,15 +250,14 @@ def test_merge_data_aggregates_duplicates(
250250

251251

252252
def test_derived_fields_correctness(valid_derived_df):
253-
result = derive_fields(valid_derived_df, "20230101T120000")
253+
result = derive_fields(valid_derived_df)
254254

255255
if isinstance(result, pl.LazyFrame):
256256
result = result.collect()
257257

258258
assert result["lead_time_days"].to_list() == [3, 5]
259259
assert result["approval_lag_days"].to_list() == [1, 1]
260260
assert result["delivery_delay_days"].to_list() == [1, 1]
261-
assert result.select(pl.col("run_id").unique()).item() == "20230101T120000"
262261
assert "order_year_week" in result.columns
263262

264263

@@ -358,22 +357,14 @@ def test_assemble_data_fails_on_missing_column(
358357
def test_dimension_references_uniqueness():
359358
df = pl.DataFrame({"id": ["1", "1", "2"], "val": ["a", "a", "b"]})
360359

361-
result = dimension_references(df.lazy(), "test", ["id"], ["id", "val"])
360+
result = dimension_references(df.lazy(), ["id"], ["id", "val"])
362361
if isinstance(result, pl.LazyFrame):
363362
result = result.collect()
364363
assert result.height == 2
365364

366365
df_conflict = pl.DataFrame({"id": ["1", "1"], "val": ["a", "b"]})
367366

368-
result = dimension_references(df_conflict.lazy(), "test", ["id"], ["id", "val"])
367+
result = dimension_references(df_conflict.lazy(), ["id"], ["id", "val"])
369368
if isinstance(result, pl.LazyFrame):
370369
result = result.collect()
371370
assert result.height == 1
372-
373-
374-
def test_dimension_references_fails_if_cols_missing():
375-
df = pl.DataFrame({"id": ["1"]})
376-
from polars.exceptions import ColumnNotFoundError
377-
378-
with pytest.raises((KeyError, ColumnNotFoundError)):
379-
dimension_references(df.lazy(), "test", ["id"], ["id", "missing"])

tests/test_semantic_stage.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -166,21 +166,6 @@ def test_seller_semantic_model_grain_preserved_success(tmp_path, valid_assembled
166166
assert dim_df.height == expected_dim_len
167167

168168

169-
def test_seller_semantic_fails_on_multiple_run_ids(tmp_path, valid_assembled_df):
170-
run_context = RunContext.create(base=tmp_path, run_id="20230101T120000")
171-
# Clone and modify run_id
172-
broken_df = valid_assembled_df.clone()
173-
broken_df = broken_df.with_columns(
174-
pl.when(pl.Series([False, True]))
175-
.then(pl.lit("another_run").cast(pl.Categorical))
176-
.otherwise(pl.col("run_id"))
177-
.alias("run_id")
178-
)
179-
180-
with pytest.raises(RuntimeError, match="Multiple run_ids detected"):
181-
build_seller_semantic(broken_df.lazy(), run_context)
182-
183-
184169
# =============================================================================
185170
# BUILD BI SEMANTIC
186171
# =============================================================================
@@ -220,34 +205,6 @@ def test_build_semantic_layer_success(
220205
assert outputs_path.exists()
221206

222207

223-
def test_build_semantic_layer_fails_on_multiple_ids(tmp_path, valid_assembled_df):
224-
run_id = "20230101T120000"
225-
run_context = RunContext.create(base=tmp_path, run_id=run_id)
226-
run_context.initialize_directories()
227-
228-
# Clone and modify run_id for Polars
229-
broken_assembled = valid_assembled_df.clone()
230-
broken_assembled = broken_assembled.with_columns(
231-
pl.when(pl.Series([False, True]))
232-
.then(pl.lit("another_run").cast(pl.Categorical))
233-
.otherwise(pl.col("run_id"))
234-
.alias("run_id")
235-
)
236-
237-
broken_assembled.write_parquet(
238-
run_context.assembled_path / "assembled_events_2023_01_01.parquet"
239-
)
240-
241-
report = build_semantic_layer(run_context)
242-
243-
assert report["status"] == "failed"
244-
assert (
245-
report["modules"]["seller_semantic"]["seller_weekly_fact"]["build_stage"]
246-
== False
247-
)
248-
assert any("Multiple run_ids detected" in error for error in report["errors"])
249-
250-
251208
def test_build_semantic_layer_fails_on_missing_columns(tmp_path, valid_assembled_df):
252209
run_id = "20230101T120000"
253210
run_context = RunContext.create(base=tmp_path, run_id=run_id)

0 commit comments

Comments
 (0)