Skip to content

Commit 2ae4c11

Browse files
chore: Add dedicated PrimaryKeyError (#17)
1 parent fb90a0e commit 2ae4c11

File tree

6 files changed

+26
-16
lines changed

6 files changed

+26
-16
lines changed

diffly/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
__version__ = "unknown"
1212

1313

14+
from ._exceptions import PrimaryKeyError
1415
from .comparison import compare_frames
1516

16-
__all__ = ["compare_frames"]
17+
__all__ = ["PrimaryKeyError", "compare_frames"]

diffly/_exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (c) QuantCo 2025-2026
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
5+
class PrimaryKeyError(ValueError):
6+
"""Raised when there is an issue with the primary key."""

diffly/comparison.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from ._cache import cached_method
1616
from ._conditions import condition_equal_columns, condition_equal_rows
17+
from ._exceptions import PrimaryKeyError
1718
from ._utils import (
1819
ABS_TOL_DEFAULT,
1920
ABS_TOL_TEMPORAL_DEFAULT,
@@ -131,23 +132,25 @@ def _init_with_validation(
131132
)
132133
if primary_key is not None:
133134
if len(primary_key) == 0:
134-
raise ValueError("The primary key columns must not be an empty list.")
135+
raise PrimaryKeyError(
136+
"The primary key columns must not be an empty list."
137+
)
135138
if missing := (set(primary_key) - set(left_schema.names())):
136139
raise ValueError(
137140
f"The primary key columns must be present in the left data frame, "
138-
f"but the following are missing: {', '.join(missing)}."
141+
f"but the following are missing: {', '.join(sorted(missing))}."
139142
)
140143
if missing := (set(primary_key) - set(right_schema.names())):
141144
raise ValueError(
142145
f"The primary key columns must be present in the right data frame, "
143-
f"but the following are missing: {', '.join(missing)}."
146+
f"but the following are missing: {', '.join(sorted(missing))}."
144147
)
145148
if not is_primary_key(left, primary_key):
146-
raise ValueError(
149+
raise PrimaryKeyError(
147150
"The columns are not a primary key for the left data frame."
148151
)
149152
if not is_primary_key(right, primary_key):
150-
raise ValueError(
153+
raise PrimaryKeyError(
151154
"The columns are not a primary key for the right data frame."
152155
)
153156

@@ -693,7 +696,7 @@ def summary(
693696

694697
def _check_primary_key(self) -> list[str]:
695698
if self.primary_key is None:
696-
raise ValueError(
699+
raise PrimaryKeyError(
697700
"`primary_key` must be provided to join `left` and `right`."
698701
)
699702
return self.primary_key

tests/test_dataframe_comparison.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
import polars as pl
77
import pytest
88

9-
from diffly import compare_frames
9+
from diffly import PrimaryKeyError, compare_frames
1010

1111

12-
@pytest.mark.parametrize("primary_key", ["name", ["name"], ("name")])
12+
@pytest.mark.parametrize("primary_key", ["name", ["name"], ("name",)])
1313
def test_primary_key_sequence_types(primary_key: str | Sequence[str]) -> None:
1414
left = pl.DataFrame({"name": ["a", "b"], "value": [1, 2]})
1515
right = pl.DataFrame({"name": ["a", "b"], "other": [3, 4]})
@@ -20,7 +20,7 @@ def test_primary_key_sequence_types(primary_key: str | Sequence[str]) -> None:
2020
def test_empty_primary_key() -> None:
2121
left = pl.DataFrame({"name": ["a", "b"], "value": [1, 2]})
2222
right = pl.DataFrame({"name": ["a", "b"], "other": [3, 4]})
23-
with pytest.raises(ValueError, match="empty"):
23+
with pytest.raises(PrimaryKeyError, match="empty"):
2424
compare_frames(left, right, primary_key=[])
2525

2626

@@ -38,9 +38,9 @@ def test_missing_primary_key() -> None:
3838
def test_pk_violation() -> None:
3939
df_valid = pl.DataFrame({"id": ["a", "b"], "value": [1, 2]})
4040
df_duplicates = pl.DataFrame({"id": ["a", "a"], "value": [1, 2]})
41-
with pytest.raises(ValueError, match="primary key.*left"):
41+
with pytest.raises(PrimaryKeyError, match="primary key.*left"):
4242
compare_frames(df_duplicates, df_valid, primary_key=["id"])
43-
with pytest.raises(ValueError, match="primary key.*right"):
43+
with pytest.raises(PrimaryKeyError, match="primary key.*right"):
4444
compare_frames(df_valid, df_duplicates, primary_key=["id"])
4545

4646

tests/test_fraction_same.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
UNSIGNED_INTEGER_DTYPES,
1515
)
1616

17-
from diffly import compare_frames
17+
from diffly import PrimaryKeyError, compare_frames
1818

1919
from .utils import FRAME_TYPES, TYPING_FRAME_TYPES
2020

@@ -23,7 +23,7 @@ def test_missing_primary_key_fraction_same() -> None:
2323
left = pl.DataFrame({"id": ["a", "b", "c"], "value": [1, 2, 3]})
2424
right = pl.DataFrame({"id": ["a", "b"], "value": [1, 2]})
2525
comparison = compare_frames(left, right)
26-
with pytest.raises(ValueError):
26+
with pytest.raises(PrimaryKeyError):
2727
_ = comparison.fraction_same("value")
2828

2929

tests/test_joined.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66
from polars.testing import assert_frame_equal
77

8-
from diffly import compare_frames
8+
from diffly import PrimaryKeyError, compare_frames
99

1010

1111
def test_joined() -> None:
@@ -31,7 +31,7 @@ def test_joined_missing_primary_key() -> None:
3131
left = pl.DataFrame({"id": ["a", "b"], "value": [1, 2]})
3232
right = pl.DataFrame({"id": ["a"], "value": [1]})
3333
comparison = compare_frames(left, right)
34-
with pytest.raises(ValueError):
34+
with pytest.raises(PrimaryKeyError):
3535
_ = comparison.joined()
3636

3737

0 commit comments

Comments
 (0)