Skip to content

Commit fd757fd

Browse files
edg956claude
authored andcommitted
Fixes #24636: use test_metadata.kwargs['model'] to identify primary table for dbt test entity links (#27366)
* fix: use test_metadata.kwargs['model'] to identify primary table for entity links (issue #24636) For dbt relationship tests with multiple upstream dependencies, the order of tables in depends_on.nodes varies by database engine (Snowflake vs Unity Catalog). The primary table being tested is explicitly specified in test_metadata.kwargs['model'] for generic tests, making this a reliable order-independent way to identify the correct table for entity link generation. This fixes validation failures when columns exist in the primary table but not in the referenced table. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Run formatter * Apply Gitar-bot comments --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 7595932 commit fd757fd

3 files changed

Lines changed: 206 additions & 10 deletions

File tree

ingestion/src/metadata/ingestion/source/database/dbt/dbt_utils.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -684,18 +684,55 @@ def get_manifest_column_name(manifest_node) -> Optional[str]:
684684

685685
def generate_entity_link(dbt_test):
686686
"""
687-
Method returns entity link
687+
Method returns entity link for dbt test cases.
688+
689+
For test cases with multiple upstream dependencies (e.g., relationship tests),
690+
we must identify the primary table being tested. This is explicitly specified in
691+
test_metadata.kwargs['model'] for generic tests. Using this explicit reference
692+
is more reliable than guessing based on upstream order (fixes issue #24636).
688693
"""
689694
manifest_node = dbt_test.get(DbtCommonEnum.MANIFEST_NODE.value)
690-
entity_link_list = [
691-
entity_link.get_entity_link(
692-
Table,
693-
fqn=table_fqn,
694-
column_name=get_manifest_column_name(manifest_node),
695-
)
696-
for table_fqn in dbt_test[DbtCommonEnum.UPSTREAM.value]
697-
]
698-
return entity_link_list
695+
upstream_list = dbt_test.get(DbtCommonEnum.UPSTREAM.value, [])
696+
697+
if not upstream_list:
698+
return []
699+
700+
primary_table_fqn = None
701+
702+
# Try to extract the primary table from test_metadata.kwargs['model']
703+
# This field contains the main table being tested (order-independent)
704+
if hasattr(manifest_node, "test_metadata"):
705+
kwargs = getattr(manifest_node.test_metadata, "kwargs", {})
706+
if isinstance(kwargs, dict):
707+
model_str = kwargs.get("model", "")
708+
if model_str:
709+
# Extract table name from ref() pattern
710+
# Handles: ref('table'), ref("table"), ref('pkg', 'table'), ref("pkg", "table")
711+
match = re.search(
712+
r"ref\(['\"](?:[^'\"]+['\"],\s*['\"])?([^'\"]+)['\"]\)",
713+
str(model_str),
714+
)
715+
if match:
716+
primary_table_name = match.group(1)
717+
# Find the matching FQN in upstream_list
718+
for fqn in upstream_list:
719+
if fqn.endswith(f".{primary_table_name}"):
720+
primary_table_fqn = fqn
721+
break
722+
723+
# Fallback: use the first upstream table if model field is not available
724+
if not primary_table_fqn and upstream_list:
725+
primary_table_fqn = upstream_list[0]
726+
727+
if not primary_table_fqn:
728+
return []
729+
730+
entity_link_str = entity_link.get_entity_link(
731+
Table,
732+
fqn=primary_table_fqn,
733+
column_name=get_manifest_column_name(manifest_node),
734+
)
735+
return [entity_link_str]
699736

700737

701738
def get_dbt_compiled_query(mnode) -> Optional[str]:

ingestion/tests/unit/resources/datasets/manifest_test_node.json

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,87 @@
1111
"adapter_type": "redshift"
1212
},
1313
"nodes": {
14+
"test.jaffle_shop.relationships_un_rueckerstattungen_medis_base_PersonNr__ref_un_person_base_": {
15+
"test_metadata": {
16+
"name": "relationships",
17+
"kwargs": {
18+
"column_name": "PersonNr",
19+
"model": "{{ ref('un_rueckerstattungen_medis_base') }}",
20+
"to": "{{ ref('un_person_base') }}",
21+
"field": "PartnerNr"
22+
},
23+
"namespace": null
24+
},
25+
"compiled": true,
26+
"resource_type": "test",
27+
"depends_on": {
28+
"macros": [
29+
"macro.dbt.test_relationships"
30+
],
31+
"nodes": [
32+
"model.jaffle_shop.un_rueckerstattungen_medis_base",
33+
"model.jaffle_shop.un_person_base"
34+
]
35+
},
36+
"config": {
37+
"enabled": true,
38+
"alias": null,
39+
"schema": "dbt_test__audit",
40+
"database": null,
41+
"tags": [],
42+
"meta": {},
43+
"materialized": "test",
44+
"severity": "ERROR",
45+
"store_failures": null,
46+
"where": null,
47+
"limit": null
48+
},
49+
"database": "dev",
50+
"schema": "dbt_test__audit",
51+
"fqn": [
52+
"jaffle_shop",
53+
"relationships_un_rueckerstattungen_medis_base_PersonNr__ref_un_person_base_"
54+
],
55+
"unique_id": "test.jaffle_shop.relationships_un_rueckerstattungen_medis_base_PersonNr__ref_un_person_base_",
56+
"raw_code": "{{ test_relationships(**_dbt_generic_test_kwargs) }}",
57+
"language": "sql",
58+
"package_name": "jaffle_shop",
59+
"root_path": "/Users/onkarravgan/Desktop/project/jaffle_shop",
60+
"path": "relationships_test.sql",
61+
"original_file_path": "models/schema.yml",
62+
"name": "relationships_un_rueckerstattungen_medis_base_PersonNr__ref_un_person_base_",
63+
"alias": "relationships_test",
64+
"checksum": {
65+
"name": "none",
66+
"checksum": ""
67+
},
68+
"tags": [],
69+
"refs": [
70+
["un_rueckerstattungen_medis_base"],
71+
["un_person_base"]
72+
],
73+
"sources": [],
74+
"metrics": [],
75+
"description": "",
76+
"columns": {},
77+
"meta": {},
78+
"docs": {
79+
"show": true,
80+
"node_color": null
81+
},
82+
"patch_path": null,
83+
"compiled_path": "target/compiled/jaffle_shop/models/schema.yml/relationships_test.sql",
84+
"build_path": null,
85+
"deferred": false,
86+
"unrendered_config": {},
87+
"created_at": 1673982251.748683,
88+
"compiled_code": "select count(*) from table where PersonNr not in (select PartnerNr from other_table)",
89+
"extra_ctes_injected": true,
90+
"extra_ctes": [],
91+
"relation_name": null,
92+
"column_name": "PersonNr",
93+
"file_key_name": "models.un_rueckerstattungen_medis_base"
94+
},
1495
"test.jaffle_shop.unique_orders_order_id.fed79b3a6e": {
1596
"test_metadata": {
1697
"name": "unique",

ingestion/tests/unit/test_dbt.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,6 +2841,84 @@ def fake_get_table_entity(table_fqn):
28412841
expected_fqn
28422842
], f"Expected lineage to resolve via target_schema 'snapshots', got: {result}"
28432843

2844+
def test_dbt_entity_link_with_mixed_case_columns_issue_24636(self):
2845+
"""
2846+
Test for issue #24636: dbt test case ingestion fails with relationship tests.
2847+
2848+
Root cause: dbt relationship tests have multiple upstream dependencies, but the
2849+
order varies by database engine (Snowflake first, Unity Catalog last). Previously,
2850+
code iterated over ALL upstream tables, causing column validation errors when
2851+
the referenced table didn't have the same column names.
2852+
2853+
Fix: Extract primary table from test_metadata.kwargs['model'] explicitly,
2854+
which is order-independent and works across all database engines.
2855+
"""
2856+
_, dbt_objects = self.get_dbt_object_files(
2857+
mock_manifest=MOCK_SAMPLE_MANIFEST_TEST_NODE
2858+
)
2859+
2860+
# Test case 1: Relationship test with kwargs['model'] extraction (main code path)
2861+
# This test exercises the primary fix for issue #24636
2862+
manifest_node = dbt_objects.dbt_manifest.nodes.get(
2863+
"test.jaffle_shop.relationships_un_rueckerstattungen_medis_base_PersonNr__ref_un_person_base_"
2864+
)
2865+
dbt_test = {
2866+
"manifest_node": manifest_node,
2867+
"upstream": [
2868+
"unity.catalog.schema.un_rueckerstattungen_medis_base", # Primary table
2869+
"unity.catalog.schema.un_person_base", # Referenced table
2870+
],
2871+
"results": "",
2872+
}
2873+
result = generate_entity_link(dbt_test=dbt_test)
2874+
# Should return only one entity link (for the primary table)
2875+
self.assertEqual(
2876+
len(result), 1, "Should only create one entity link for primary table"
2877+
)
2878+
# Link should be to the primary table extracted from kwargs['model']
2879+
self.assertIn("un_rueckerstattungen_medis_base", result[0])
2880+
self.assertIn("::columns::PersonNr>", result[0])
2881+
self.assertNotIn("un_person_base", result[0])
2882+
2883+
# Test case 2: Verify kwargs['model'] path works with REVERSED upstream order
2884+
# This proves the fix is order-independent (the bug in #24636)
2885+
dbt_test_reversed = {
2886+
"manifest_node": manifest_node,
2887+
"upstream": [
2888+
"unity.catalog.schema.un_person_base", # Referenced table FIRST
2889+
"unity.catalog.schema.un_rueckerstattungen_medis_base", # Primary table LAST
2890+
],
2891+
"results": "",
2892+
}
2893+
result_reversed = generate_entity_link(dbt_test=dbt_test_reversed)
2894+
# Should still return the primary table, proving kwargs['model'] extraction works
2895+
self.assertEqual(
2896+
len(result_reversed),
2897+
1,
2898+
"Should return primary table regardless of upstream order",
2899+
)
2900+
self.assertIn("un_rueckerstattungen_medis_base", result_reversed[0])
2901+
self.assertIn("::columns::PersonNr>", result_reversed[0])
2902+
self.assertNotIn("un_person_base", result_reversed[0])
2903+
2904+
# Test case 3: Fallback to first upstream when kwargs['model'] is unavailable
2905+
# This tests the fallback path (lines 720-722)
2906+
manifest_node_fallback = dbt_objects.dbt_manifest.nodes.get(
2907+
"test.jaffle_shop.unique_orders_order_id.fed79b3a6e"
2908+
)
2909+
dbt_test_fallback = {
2910+
"manifest_node": manifest_node_fallback,
2911+
"upstream": [
2912+
"unity.catalog.schema.un_abrechnungsposition_cur", # First table
2913+
"unity.catalog.schema.un_person_base", # Second table
2914+
],
2915+
"results": "",
2916+
}
2917+
result_fallback = generate_entity_link(dbt_test=dbt_test_fallback)
2918+
# Should fall back to first upstream since model pattern doesn't match
2919+
self.assertEqual(len(result_fallback), 1)
2920+
self.assertIn("un_abrechnungsposition_cur", result_fallback[0])
2921+
28442922

28452923
class TestDownloadDbtFiles(TestCase):
28462924
"""

0 commit comments

Comments
 (0)