Skip to content

Commit edfdf80

Browse files
Address Copilot review: Fix multi-table column lineage fallback and correct pytest ordering
1 parent 715b5d2 commit edfdf80

2 files changed

Lines changed: 50 additions & 4 deletions

File tree

ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,12 @@ def _build_column_lineage_from_parser(
355355
)
356356
logger.debug(traceback.format_exc())
357357

358-
if not column_lineage:
359-
# Fallback: parser returned no column lineage or all pairs
360-
# were filtered out. Use name-based matching as before.
358+
# Only fall back to name-based matching when the parser found
359+
# NO column lineage globally (parse failure, too complex, no aliases).
360+
# If the parser DID produce lineage but none matched this specific
361+
# from_entity (multi-table query), return an empty list rather than
362+
# manufacturing incorrect cross-table lineage.
363+
if not column_lineage and not lineage_parser.column_lineage:
361364
columns = [col.name.root for col in data_model_entity.columns]
362365
return self._get_column_lineage(
363366
from_entity, data_model_entity, columns

ingestion/tests/unit/topology/dashboard/test_quicksight.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,49 @@ def test_build_column_lineage_from_parser_multi_table_filters_correctly(self):
497497
assert result[0].fromColumns == [src_fqn]
498498
assert result[0].toColumn == alias_fqn
499499

500+
@pytest.mark.order(11)
501+
def test_build_column_lineage_no_fallback_when_parser_has_global_lineage(self):
502+
"""
503+
Regression test for the multi-table fallback bug (Issue #26670).
504+
505+
When lineage_parser.column_lineage is non-empty (parser succeeded)
506+
but none of the pairs match from_entity (because they belong to a
507+
different upstream table in a multi-table JOIN), the method must
508+
return an empty list and must NOT call _get_column_lineage (the
509+
name-based fallback). Calling the fallback here would manufacture
510+
incorrect cross-table column lineage.
511+
"""
512+
# Parser found lineage for a DIFFERENT table, not our from_entity
513+
other_src_col = MagicMock()
514+
other_src_col.raw_name = "user_id"
515+
other_src_col._parent = MagicMock()
516+
other_src_col._parent.__str__ = MagicMock(return_value="users_table")
517+
518+
other_tgt_col = MagicMock()
519+
other_tgt_col.raw_name = "uid"
520+
521+
mock_parser = MagicMock()
522+
# Parser globally found lineage — but only for 'users_table'
523+
mock_parser.column_lineage = [(other_src_col, other_tgt_col)]
524+
525+
mock_from_entity = MagicMock()
526+
# Our from_entity is 'orders_table' — no parser pairs match it
527+
mock_from_entity.name.root = "orders_table"
528+
mock_data_model = MagicMock()
529+
530+
with patch.object(
531+
self.quicksight,
532+
"_get_column_lineage",
533+
) as mock_fallback:
534+
result = self.quicksight._build_column_lineage_from_parser(
535+
mock_parser, mock_from_entity, mock_data_model
536+
)
537+
538+
# Must NOT have called the name-based fallback
539+
mock_fallback.assert_not_called()
540+
# Must return an empty list — no manufactured lineage
541+
assert result == []
542+
500543
@pytest.mark.order(12)
501544
def test_build_column_lineage_from_parser_iterable_parent(self):
502545
"""
@@ -550,7 +593,7 @@ def test_build_column_lineage_from_parser_iterable_parent(self):
550593
assert result[0].fromColumns == [src_fqn]
551594
assert result[0].toColumn == alias_fqn
552595

553-
@pytest.mark.order(11)
596+
@pytest.mark.order(13)
554597
def test_build_column_lineage_from_parser_falls_back_when_empty(self):
555598
"""
556599
When lineage_parser.column_lineage is empty (parser failed or

0 commit comments

Comments
 (0)