Skip to content

Commit e5f2484

Browse files
fix: Column name mapping in missing left/right (#12)
1 parent 1cb5220 commit e5f2484

5 files changed

Lines changed: 34 additions & 8 deletions

File tree

sqlcompyre/analysis/schema_comparison.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def table_names(self) -> Names:
7777
return Names(
7878
left=set(self.left_tables.keys()),
7979
right=set(self.right_tables.keys()),
80+
name_mapping=None,
8081
ignore_casing=self.ignore_casing,
8182
)
8283

sqlcompyre/analysis/table_comparison.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def column_names(self) -> Names:
156156
return Names(
157157
left={col.name for col in self.left_table.columns},
158158
right={col.name for col in self.right_table.columns},
159+
name_mapping=self.column_name_mapping,
159160
ignore_casing=self.ignore_casing,
160161
)
161162

sqlcompyre/results/names.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,18 @@
77
class Names:
88
"""Investigate the names of database objects."""
99

10-
def __init__(self, left: set[str], right: set[str], ignore_casing: bool):
10+
def __init__(
11+
self,
12+
left: set[str],
13+
right: set[str],
14+
name_mapping: dict[str, str] | None,
15+
ignore_casing: bool,
16+
):
1117
"""
1218
Args:
1319
left: Names from the "left" database object.
1420
right: Names from the "right" database object.
21+
name_mapping: Mapping from the "left" to the "right" database object.
1522
ignore_casing: Whether to ignore casing for name equality.
1623
"""
1724
if ignore_casing:
@@ -20,6 +27,10 @@ def __init__(self, left: set[str], right: set[str], ignore_casing: bool):
2027
else:
2128
self._set_left = left
2229
self._set_right = right
30+
self._name_mapping = name_mapping
31+
self._inverse_name_mapping = (
32+
{v: k for k, v in name_mapping.items()} if name_mapping else {}
33+
)
2334

2435
@cached_property
2536
def left(self) -> list[str]:
@@ -39,12 +50,22 @@ def in_common(self) -> list[str]:
3950
@cached_property
4051
def missing_left(self) -> list[str]:
4152
"""Ordered list of names provided only by the "right" database object."""
42-
return sorted(self._set_right - self._set_left)
53+
if self._name_mapping:
54+
right_renamed = {
55+
self._inverse_name_mapping.get(k, k) for k in self._set_right
56+
}
57+
return sorted(right_renamed - self._set_left)
58+
else:
59+
return sorted(self._set_right - self._set_left)
4360

4461
@cached_property
4562
def missing_right(self) -> list[str]:
4663
"""Ordered list of names provided only by the "left" database object."""
47-
return sorted(self._set_left - self._set_right)
64+
if self._name_mapping:
65+
left_renamed = {self._name_mapping.get(k, k) for k in self._set_left}
66+
return sorted(left_renamed - self._set_right)
67+
else:
68+
return sorted(self._set_left - self._set_right)
4869

4970
@cached_property
5071
def equal(self) -> bool:

tests/analysis/table_comparison/test_column_matching.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ def test_partly_renaming(
8989
column_name_mapping={"age": "age_v2", "gpa": "gpa_v2"},
9090
)
9191
assert len(comparison.column_names.in_common) == 2
92-
assert len(comparison.column_names.missing_left) == 2
93-
assert len(comparison.column_names.missing_right) == 2
92+
assert len(comparison.column_names.missing_left) == 0
93+
assert len(comparison.column_names.missing_right) == 0
9494
assert comparison.row_counts.diff == 1
9595
# Ensure that all columns are matched, one is primary key, 3 per table left
9696
assert pd.read_sql(comparison.row_matches.joined_equal, con=engine).shape[1] == 7
@@ -102,8 +102,8 @@ def test_partly_renaming(
102102
column_name_mapping={"age_v2": "age", "gpa_v2": "gpa"},
103103
)
104104
assert len(comparison.column_names.in_common) == 2
105-
assert len(comparison.column_names.missing_left) == 2
106-
assert len(comparison.column_names.missing_right) == 2
105+
assert len(comparison.column_names.missing_left) == 0
106+
assert len(comparison.column_names.missing_right) == 0
107107
assert comparison.row_counts.diff == 1
108108
# Ensure that all columns are matched, one is primary key, 3 per table left
109109
assert pd.read_sql(comparison.row_matches.joined_equal, con=engine).shape[1] == 7

tests/report/formatters/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@ def metadata_description() -> Metadata:
2020
@pytest.fixture()
2121
def names() -> Names:
2222
return Names(
23-
left={"hello", "world"}, right={"hello", "hi", "there"}, ignore_casing=False
23+
left={"hello", "world"},
24+
right={"hello", "hi", "there"},
25+
name_mapping=None,
26+
ignore_casing=False,
2427
)
2528

2629

0 commit comments

Comments
 (0)