-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix(quicksight): resolve column aliases to correct upstream column in lineage (#26670) #27031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2d23162
8cac467
db4c661
abbfab2
f0e443a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -388,3 +388,78 @@ def describe_data_set_side_effect(**kwargs): | |
|
|
||
| col_names_b = {col.name.root for col in dm_b.columns} | ||
| assert col_names_b == {"email", "created_at"} | ||
|
|
||
| @pytest.mark.order(9) | ||
| def test_build_column_lineage_from_parser_uses_aliases(self): | ||
| """ | ||
| When a CustomSql query uses column aliases (SELECT src_col AS alias_col), | ||
| _build_column_lineage_from_parser must resolve the source column name and | ||
| map it to the aliased data model column, not drop the lineage pair. | ||
| """ | ||
| from unittest.mock import MagicMock | ||
|
|
||
| src_col = MagicMock() | ||
| src_col.raw_name = "original_col" | ||
|
|
||
| tgt_col = MagicMock() | ||
| tgt_col.raw_name = "alias_col" | ||
|
|
||
| mock_parser = MagicMock() | ||
| mock_parser.column_lineage = [(src_col, tgt_col)] | ||
|
|
||
| src_fqn = "db.schema.my_table.original_col" | ||
|
Comment on lines
+401
to
+410
|
||
| alias_fqn = "qs_service.dataset.alias_col" | ||
|
|
||
| mock_from_entity = MagicMock() | ||
| mock_data_model = MagicMock() | ||
|
|
||
| with patch( | ||
| "metadata.ingestion.source.dashboard.quicksight.metadata.get_column_fqn", | ||
| return_value=src_fqn, | ||
| ) as mock_get_col_fqn: | ||
| with patch.object( | ||
| self.quicksight, | ||
| "_get_data_model_column_fqn", | ||
| return_value=alias_fqn, | ||
| ) as mock_get_dm_col_fqn: | ||
| result = self.quicksight._build_column_lineage_from_parser( | ||
| mock_parser, mock_from_entity, mock_data_model | ||
| ) | ||
|
|
||
| mock_get_col_fqn.assert_called_once_with( | ||
| table_entity=mock_from_entity, column="original_col" | ||
| ) | ||
| mock_get_dm_col_fqn.assert_called_once_with( | ||
| data_model_entity=mock_data_model, column="alias_col" | ||
| ) | ||
| assert len(result) == 1 | ||
| assert result[0].fromColumns == [src_fqn] | ||
| assert result[0].toColumn == alias_fqn | ||
|
|
||
| @pytest.mark.order(10) | ||
| def test_build_column_lineage_from_parser_falls_back_when_no_parser_results(self): | ||
| """ | ||
| When lineage_parser.column_lineage is empty, _build_column_lineage_from_parser | ||
| must fall back to name-based matching via _get_column_lineage. | ||
| """ | ||
| mock_parser = MagicMock() | ||
| mock_parser.column_lineage = [] | ||
|
|
||
| fallback_lineage = [MagicMock()] | ||
| mock_from_entity = MagicMock() | ||
| mock_data_model = MagicMock() | ||
| mock_col = MagicMock() | ||
| mock_col.name = MagicMock(root="col_a") | ||
| mock_data_model.columns = [mock_col] | ||
|
|
||
| with patch.object( | ||
| self.quicksight, | ||
| "_get_column_lineage", | ||
| return_value=fallback_lineage, | ||
| ) as mock_get_col_lineage: | ||
| result = self.quicksight._build_column_lineage_from_parser( | ||
| mock_parser, mock_from_entity, mock_data_model | ||
| ) | ||
|
|
||
| mock_get_col_lineage.assert_called_once() | ||
| assert result is fallback_lineage | ||
|
Comment on lines
+392
to
+465
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_build_column_lineage_from_parser matches parsed columns to
from_entityusing onlysrc_col.raw_name. In multi-table CustomSql queries, different source tables often share column names (e.g.,id), so this can incorrectly attach a lineage pair to the wrongfrom_entity(becauseget_column_fqnwill happily resolveidon any table that has it). Consider filteringlineage_parser.column_lineageby the source column’s parent table (e.g.,src_col._parent) or by passing the current source table context into this method and usingget_column_fqn(..., table=..., schema=..., database=...)so only pairs belonging to the current upstream table are emitted.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@SaaiAravindhRaja this is a valid comment