Skip to content

Commit 1bf4e1a

Browse files
improve test coverage
1 parent f7292ad commit 1bf4e1a

2 files changed

Lines changed: 135 additions & 122 deletions

File tree

tests/summary/test_summary.py

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
# Copyright (c) QuantCo 2025-2026
22
# SPDX-License-Identifier: BSD-3-Clause
33

4+
import itertools
5+
import json
46
from collections.abc import Callable
7+
from datetime import date, datetime
8+
from decimal import Decimal
59
from typing import Any
610

711
import polars as pl
812
import pytest
913

1014
from diffly import compare_frames
11-
from diffly.summary import _format_fraction_as_percentage
15+
from diffly.comparison import DataFrameComparison
16+
from diffly.summary import _format_fraction_as_percentage, _to_python
1217

1318

1419
@pytest.mark.parametrize("show_perfect_column_matches", [True, False])
@@ -124,3 +129,132 @@ def test_zero_top_k_column_changes_with_show_sample_primary_key() -> None:
124129
top_k_column_changes=0,
125130
show_sample_primary_key_per_change=True,
126131
)
132+
133+
134+
def _make_comparison() -> DataFrameComparison:
135+
# Designed so every parametrized flag affects the expected JSON output:
136+
# - Same columns in both frames → schemas equal → slim suppresses schemas section
137+
# - status matches perfectly for joined rows → show_perfect_column_matches matters
138+
# - value differs for id=2 → always has a non-perfect column
139+
# - id=4 left-only, id=5 right-only → sample rows matter
140+
left = pl.DataFrame(
141+
{
142+
"id": [1, 2, 3, 4],
143+
"status": ["a", "b", "c", "d"],
144+
"value": [10.0, 20.0, 30.0, 40.0],
145+
}
146+
)
147+
right = pl.DataFrame(
148+
{
149+
"id": [1, 2, 3, 5],
150+
"status": ["a", "b", "c", "e"],
151+
"value": [10.0, 25.0, 30.0, 50.0],
152+
}
153+
)
154+
return compare_frames(left, right, primary_key="id")
155+
156+
157+
@pytest.mark.parametrize(
158+
"show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk",
159+
[
160+
(*combo[:2], combo[2], combo[3], combo[3] and combo[1])
161+
for combo in itertools.product([True, False], repeat=4)
162+
],
163+
)
164+
def test_summary_data_parametrized(
165+
show_perfect_column_matches: bool,
166+
show_top_column_changes: bool,
167+
slim: bool,
168+
sample_rows: bool,
169+
sample_pk: bool,
170+
) -> None:
171+
comp = _make_comparison()
172+
top_k = 3 if show_top_column_changes else 0
173+
summary = comp.summary(
174+
show_perfect_column_matches=show_perfect_column_matches,
175+
top_k_column_changes=top_k,
176+
sample_k_rows_only=3 if sample_rows else 0,
177+
show_sample_primary_key_per_change=sample_pk,
178+
slim=slim,
179+
)
180+
result = json.loads(summary.to_json())
181+
182+
# --- Build expected dictionary ---
183+
# Schemas: equal (same columns, same dtypes) → suppressed in slim mode
184+
expected_schemas: dict | None = None
185+
if not slim:
186+
expected_schemas = {
187+
"left_only_names": [],
188+
"in_common": [
189+
["id", "Int64", "Int64"],
190+
["status", "String", "String"],
191+
["value", "Float64", "Float64"],
192+
],
193+
"right_only_names": [],
194+
}
195+
196+
# Columns: status has 100% match rate, value has 2/3
197+
# show_perfect_column_matches controls whether the perfect status column appears
198+
value_col = {
199+
"name": "value",
200+
"match_rate": pytest.approx(2 / 3),
201+
"n_total_changes": 1 if show_top_column_changes else 0,
202+
"changes": (
203+
[
204+
{
205+
"old": 20.0,
206+
"new": 25.0,
207+
"count": 1,
208+
"sample_pk": [2] if sample_pk else None,
209+
}
210+
]
211+
if show_top_column_changes
212+
else None
213+
),
214+
}
215+
expected_columns = []
216+
if show_perfect_column_matches:
217+
expected_columns.append(
218+
{"name": "status", "match_rate": 1.0, "n_total_changes": 0, "changes": None}
219+
)
220+
expected_columns.append(value_col)
221+
222+
expected = {
223+
"equal": False,
224+
"left_name": "left",
225+
"right_name": "right",
226+
"primary_key": ["id"],
227+
"schemas": expected_schemas,
228+
"rows": {
229+
"n_left": 4,
230+
"n_right": 4,
231+
"n_left_only": 1,
232+
"n_joined_equal": 2,
233+
"n_joined_unequal": 1,
234+
"n_right_only": 1,
235+
},
236+
"columns": expected_columns,
237+
"sample_rows_left_only": [[4]] if sample_rows else None,
238+
"sample_rows_right_only": [[5]] if sample_rows else None,
239+
}
240+
241+
assert result == expected
242+
243+
244+
@pytest.mark.parametrize(
245+
"input, expected",
246+
[
247+
([1, 2, 3], [1, 2, 3]),
248+
({"a": 1, "b": 2}, {"a": 1, "b": 2}),
249+
("string", "string"),
250+
(123, 123),
251+
(12.34, 12.34),
252+
(True, True),
253+
(None, None),
254+
(date(2024, 1, 1), "2024-01-01"),
255+
(datetime(2024, 1, 1, 12, 0, 0), "2024-01-01T12:00:00"),
256+
(Decimal("12.34"), 12.34),
257+
],
258+
)
259+
def test__to_python(input: Any, expected: Any) -> None:
260+
assert _to_python(input) == expected

tests/test_summary_data.py

Lines changed: 0 additions & 121 deletions
This file was deleted.

0 commit comments

Comments
 (0)