@@ -364,6 +364,7 @@ def describe_data_set_side_effect(**kwargs):
364364 col_names_b = {col .name .root for col in dm_b .columns }
365365 assert col_names_b == {"email" , "created_at" }
366366
367+ @pytest .mark .order (9 )
367368 def test_chart_source_state_populated (self ):
368369 """Verify register_record_chart populates chart_source_state after yield_dashboard_chart."""
369370 dashboard_details = DashboardDetail (** {** MOCK_DASHBOARD_DETAILS , "Version" : mock_data ["Version" ]})
@@ -372,3 +373,192 @@ def test_chart_source_state_populated(self):
372373 assert len (self .quicksight .chart_source_state ) == len (mock_data ["Version" ]["Sheets" ])
373374 for fqn in self .quicksight .chart_source_state :
374375 assert "quicksight_source_test" in fqn
376+
377+ @pytest .mark .order (10 )
378+ def test_build_column_lineage_from_parser_resolves_alias (self ):
379+ """
380+ When CustomSql uses SELECT src_col AS alias_col,
381+ _build_column_lineage_from_parser must map src_col (source)
382+ to alias_col (data model column) — not match by name.
383+ Issue #26670.
384+ """
385+ src_col = MagicMock ()
386+ src_col .raw_name = "id"
387+ src_col ._parent = None # No parent — single-table query
388+
389+ tgt_col = MagicMock ()
390+ tgt_col .raw_name = "relation_id"
391+
392+ mock_parser = MagicMock ()
393+ mock_parser .column_lineage = [(src_col , tgt_col )]
394+
395+ src_fqn = "postgres.public.relation_table.id"
396+ alias_fqn = "quicksight_service.dataset.relation_id"
397+
398+ mock_from_entity = MagicMock ()
399+ mock_from_entity .name .root = "relation_table"
400+ mock_data_model = MagicMock ()
401+
402+ with patch (
403+ "metadata.ingestion.source.dashboard.quicksight.metadata.get_column_fqn" ,
404+ return_value = src_fqn ,
405+ ) as mock_get_col_fqn :
406+ with patch .object (
407+ self .quicksight ,
408+ "_get_data_model_column_fqn" ,
409+ return_value = alias_fqn ,
410+ ) as mock_get_dm_col_fqn :
411+ result = self .quicksight ._build_column_lineage_from_parser (
412+ mock_parser , mock_from_entity , mock_data_model
413+ )
414+
415+ mock_get_col_fqn .assert_called_once_with (
416+ table_entity = mock_from_entity , column = "id"
417+ )
418+ mock_get_dm_col_fqn .assert_called_once_with (
419+ data_model_entity = mock_data_model , column = "relation_id"
420+ )
421+ assert len (result ) == 1
422+ assert result [0 ].fromColumns == [src_fqn ]
423+ assert result [0 ].toColumn == alias_fqn
424+
425+ @pytest .mark .order (11 )
426+ def test_build_column_lineage_from_parser_multi_table_filters_correctly (self ):
427+ """
428+ When CustomSql joins multiple tables with shared column names
429+ (e.g. t1.id and t2.id), _build_column_lineage_from_parser must
430+ only emit lineage for columns belonging to from_entity — not
431+ columns from other tables with the same name.
432+ Issue #26670.
433+ """
434+ # Column from the correct upstream table
435+ src_col_correct = MagicMock ()
436+ src_col_correct .raw_name = "id"
437+ src_col_correct ._parent = MagicMock ()
438+ src_col_correct ._parent .__str__ = MagicMock (
439+ return_value = "relation_table"
440+ )
441+
442+ tgt_col_correct = MagicMock ()
443+ tgt_col_correct .raw_name = "relation_id"
444+
445+ # Column from a DIFFERENT table with same name 'id'
446+ src_col_wrong = MagicMock ()
447+ src_col_wrong .raw_name = "id"
448+ src_col_wrong ._parent = MagicMock ()
449+ src_col_wrong ._parent .__str__ = MagicMock (return_value = "other_table" )
450+
451+ tgt_col_wrong = MagicMock ()
452+ tgt_col_wrong .raw_name = "other_relation_id"
453+
454+ mock_parser = MagicMock ()
455+ mock_parser .column_lineage = [
456+ (src_col_correct , tgt_col_correct ),
457+ (src_col_wrong , tgt_col_wrong ),
458+ ]
459+
460+ src_fqn = "postgres.public.relation_table.id"
461+ alias_fqn = "quicksight_service.dataset.relation_id"
462+
463+ mock_from_entity = MagicMock ()
464+ mock_from_entity .name .root = "relation_table"
465+ mock_data_model = MagicMock ()
466+
467+ with patch (
468+ "metadata.ingestion.source.dashboard.quicksight.metadata.get_column_fqn" ,
469+ return_value = src_fqn ,
470+ ):
471+ with patch .object (
472+ self .quicksight ,
473+ "_get_data_model_column_fqn" ,
474+ return_value = alias_fqn ,
475+ ):
476+ result = self .quicksight ._build_column_lineage_from_parser (
477+ mock_parser , mock_from_entity , mock_data_model
478+ )
479+
480+ # Only 1 result — the wrong table's column must be filtered out
481+ assert len (result ) == 1
482+ assert result [0 ].fromColumns == [src_fqn ]
483+ assert result [0 ].toColumn == alias_fqn
484+
485+ @pytest .mark .order (12 )
486+ def test_build_column_lineage_no_fallback_when_parser_has_global_lineage (self ):
487+ """
488+ Regression test for the multi-table fallback bug (Issue #26670).
489+
490+ When lineage_parser.column_lineage is non-empty (parser succeeded)
491+ but none of the pairs match from_entity (because they belong to a
492+ different upstream table in a multi-table JOIN), the method must
493+ return an empty list and must NOT call _get_column_lineage (the
494+ name-based fallback). Calling the fallback here would manufacture
495+ incorrect cross-table column lineage.
496+ """
497+ # Parser found lineage for a DIFFERENT table, not our from_entity
498+ other_src_col = MagicMock ()
499+ other_src_col .raw_name = "user_id"
500+ other_src_col ._parent = MagicMock ()
501+ other_src_col ._parent .__str__ = MagicMock (return_value = "users_table" )
502+
503+ other_tgt_col = MagicMock ()
504+ other_tgt_col .raw_name = "uid"
505+
506+ mock_parser = MagicMock ()
507+ # Parser globally found lineage — but only for 'users_table'
508+ mock_parser .column_lineage = [(other_src_col , other_tgt_col )]
509+
510+ mock_from_entity = MagicMock ()
511+ # Our from_entity is 'orders_table' — no parser pairs match it
512+ mock_from_entity .name .root = "orders_table"
513+ mock_data_model = MagicMock ()
514+
515+ with patch .object (
516+ self .quicksight ,
517+ "_get_column_lineage" ,
518+ ) as mock_fallback :
519+ result = self .quicksight ._build_column_lineage_from_parser (
520+ mock_parser , mock_from_entity , mock_data_model
521+ )
522+
523+ # Must NOT have called the name-based fallback
524+ mock_fallback .assert_not_called ()
525+ # Must return an empty list — no manufactured lineage
526+ assert result == []
527+
528+ @pytest .mark .order (13 )
529+ def test_build_column_lineage_from_parser_falls_back_when_empty (self ):
530+ """
531+ When lineage_parser.column_lineage is empty (parser failed or
532+ no aliases), _build_column_lineage_from_parser must fall back
533+ to name-based matching via _get_column_lineage.
534+ Issue #26670.
535+ """
536+ mock_parser = MagicMock ()
537+ mock_parser .column_lineage = []
538+
539+ fallback_lineage = [MagicMock ()]
540+ mock_from_entity = MagicMock ()
541+ mock_from_entity .name .root = "relation_table"
542+
543+ # Build mock columns properly — avoid MagicMock name kwarg trap
544+ mock_col = MagicMock ()
545+ mock_col .name = MagicMock ()
546+ mock_col .name .root = "col_a"
547+
548+ mock_data_model = MagicMock ()
549+ mock_data_model .columns = [mock_col ]
550+
551+ with patch .object (
552+ self .quicksight ,
553+ "_get_column_lineage" ,
554+ return_value = fallback_lineage ,
555+ ) as mock_get_col_lineage :
556+ result = self .quicksight ._build_column_lineage_from_parser (
557+ mock_parser , mock_from_entity , mock_data_model
558+ )
559+
560+ # Verify fallback was called with correct column names
561+ mock_get_col_lineage .assert_called_once_with (
562+ mock_from_entity , mock_data_model , ["col_a" ]
563+ )
564+ assert result is fallback_lineage
0 commit comments