@@ -799,6 +799,28 @@ def test_get_create_table_request(self, mock_get_schema_fqn, mock_get_table_fqn)
799799 create_request .columns [i ].dataTypeDisplay , expected_type_display
800800 )
801801
802+ @patch (
803+ "metadata.ingestion.source.pipeline.openlineage.metadata.OpenlineageSource._get_table_fqn_from_om"
804+ )
805+ @patch (
806+ "metadata.ingestion.source.pipeline.openlineage.metadata.OpenlineageSource._get_schema_fqn_from_om"
807+ )
808+ def test_get_create_table_request_schema_not_found_returns_none (
809+ self , mock_get_schema_fqn , mock_get_table_fqn
810+ ):
811+ """Schema not found in any configured service — returns None without raising."""
812+ mock_get_table_fqn .side_effect = FQNNotFoundException ("Table not found" )
813+ mock_get_schema_fqn .side_effect = FQNNotFoundException ("Schema not found" )
814+ table_data = {
815+ "name" : "unknown_schema.employees" ,
816+ "namespace" : "bigquery" ,
817+ "facets" : {},
818+ }
819+
820+ result = self .open_lineage_source .get_create_table_request (table_data )
821+
822+ assert result is None
823+
802824 @patch ("confluent_kafka.Consumer" )
803825 def test_get_pipelines_list_filters_complete_events (self , mock_consumer_class ):
804826 """Test that get_pipelines_list returns COMPLETE events"""
@@ -1689,16 +1711,18 @@ def test_yield_pipeline_lineage_topic_not_found_skips_gracefully(self):
16891711 mock_pipeline = Mock ()
16901712 mock_pipeline .id .root = pipeline_id
16911713
1692- with patch .object (
1693- self .open_lineage_source , "metadata"
1694- ) as mock_metadata , patch .object (
1695- self .open_lineage_source ,
1696- "_get_table_fqn" ,
1697- return_value = "db-service.public.some_table" ,
1698- ), patch .object (
1699- self .open_lineage_source ,
1700- "get_create_table_request" ,
1701- return_value = None ,
1714+ with (
1715+ patch .object (self .open_lineage_source , "metadata" ) as mock_metadata ,
1716+ patch .object (
1717+ self .open_lineage_source ,
1718+ "_get_table_fqn" ,
1719+ return_value = "db-service.public.some_table" ,
1720+ ),
1721+ patch .object (
1722+ self .open_lineage_source ,
1723+ "get_create_table_request" ,
1724+ return_value = None ,
1725+ ),
17021726 ):
17031727 # Empty messaging services list — no broker match for unknown-broker
17041728 mock_metadata .list_all_entities .return_value = iter ([])
@@ -2093,6 +2117,134 @@ def test_cleanup_handles_downstream_edges_scoped_to_event(self):
20932117 self .assertEqual (str (deleted_edge .toEntity .id .root ), table_b_id )
20942118 self .assertEqual (deleted_edge .toEntity .type , "table" )
20952119
2120+ def test_parse_glue_table_name_trino_glue_catalog_schema (self ):
2121+ """Trino backed by AWS Glue Data Catalog uses the public schema and underscore-separated table names.
2122+ Verifies the parser handles the common Glue catalog table naming pattern correctly.
2123+ """
2124+ result = OpenlineageSource ._parse_glue_table_name (
2125+ "table/public/order_line_items"
2126+ )
2127+ self .assertEqual (result .name , "order_line_items" )
2128+ self .assertEqual (result .schema , "public" )
2129+
2130+ def test_parse_glue_table_name_happy_path (self ):
2131+ """Glue OL naming: table/{database}/{table} — source: Naming.java GlueNaming."""
2132+ result = OpenlineageSource ._parse_glue_table_name ("table/sales/users" )
2133+ self .assertEqual (result .name , "users" )
2134+ self .assertEqual (result .schema , "sales" )
2135+
2136+ def test_parse_glue_table_name_normalizes_to_lowercase (self ):
2137+ """Glue table and database names are normalized to lowercase for FQN matching."""
2138+ result = OpenlineageSource ._parse_glue_table_name ("table/Sales/Users" )
2139+ self .assertEqual (result .name , "users" )
2140+ self .assertEqual (result .schema , "sales" )
2141+
2142+ def test_parse_glue_table_name_not_glue_format_returns_none (self ):
2143+ """Names without the table/ prefix are not Glue format and return None."""
2144+ self .assertIsNone (OpenlineageSource ._parse_glue_table_name ("sales.users" ))
2145+
2146+ def test_parse_glue_table_name_missing_table_part_returns_none (self ):
2147+ """table/ prefix with only one path segment is malformed and returns None."""
2148+ self .assertIsNone (OpenlineageSource ._parse_glue_table_name ("table/only_db" ))
2149+
2150+ def test_parse_slash_table_name_happy_path (self ):
2151+ """Kusto OL naming: {database}/{table} — source: Naming.java KustoNaming."""
2152+ result = OpenlineageSource ._parse_slash_table_name ("mydb/mytable" )
2153+ self .assertEqual (result .name , "mytable" )
2154+ self .assertEqual (result .schema , "mydb" )
2155+
2156+ def test_parse_slash_table_name_normalizes_to_lowercase (self ):
2157+ """Kusto table and database names are normalized to lowercase for FQN matching."""
2158+ result = OpenlineageSource ._parse_slash_table_name ("MyDB/MyTable" )
2159+ self .assertEqual (result .name , "mytable" )
2160+ self .assertEqual (result .schema , "mydb" )
2161+
2162+ def test_parse_slash_table_name_single_part_returns_none (self ):
2163+ """A single path segment without a slash cannot be split into db/table and returns None."""
2164+ self .assertIsNone (OpenlineageSource ._parse_slash_table_name ("only_table" ))
2165+
2166+ def test_parse_cosmos_table_name_happy_path (self ):
2167+ """Cosmos OL naming: db from namespace /dbs/{db}, name colls/{coll} — source: Naming.java CosmosNaming."""
2168+ result = OpenlineageSource ._parse_cosmos_table_name (
2169+ "azurecosmos://myaccount.documents.azure.com/dbs/mydb" ,
2170+ "colls/mycollection" ,
2171+ )
2172+ self .assertEqual (result .name , "mycollection" )
2173+ self .assertEqual (result .schema , "mydb" )
2174+
2175+ def test_parse_cosmos_table_name_normalizes_to_lowercase (self ):
2176+ """Cosmos database and collection names are normalized to lowercase for FQN matching."""
2177+ result = OpenlineageSource ._parse_cosmos_table_name (
2178+ "azurecosmos://host/dbs/MyDB" , "colls/MyCollection"
2179+ )
2180+ self .assertEqual (result .name , "mycollection" )
2181+ self .assertEqual (result .schema , "mydb" )
2182+
2183+ def test_parse_cosmos_table_name_no_dbs_segment_returns_none (self ):
2184+ """A Cosmos namespace without /dbs/{db} cannot provide the database name and returns None."""
2185+ self .assertIsNone (
2186+ OpenlineageSource ._parse_cosmos_table_name (
2187+ "azurecosmos://host" , "colls/mycoll"
2188+ )
2189+ )
2190+
2191+ def test_parse_cosmos_table_name_non_colls_name_returns_none (self ):
2192+ """A Cosmos name not matching colls/{collection} is non-conformant and returns None."""
2193+ self .assertIsNone (
2194+ OpenlineageSource ._parse_cosmos_table_name (
2195+ "azurecosmos://host/dbs/mydb" , "mycollection"
2196+ )
2197+ )
2198+
2199+ def test_get_table_details_glue_namespace_parses_slash_name (self ):
2200+ """AWS Glue EMR events use arn:aws:glue namespace + table/{db}/{table} name."""
2201+ data = {
2202+ "namespace" : "arn:aws:glue:us-east-1:123456789012" ,
2203+ "name" : "table/sales/users" ,
2204+ }
2205+ result = OpenlineageSource ._get_table_details (data )
2206+ self .assertEqual (result .name , "users" )
2207+ self .assertEqual (result .schema , "sales" )
2208+
2209+ def test_get_table_details_kusto_namespace_parses_slash_name (self ):
2210+ """Azure Kusto events use azurekusto namespace + {db}/{table} name."""
2211+ data = {
2212+ "namespace" : "azurekusto://mycluster.kusto.windows.net" ,
2213+ "name" : "mydb/mytable" ,
2214+ }
2215+ result = OpenlineageSource ._get_table_details (data )
2216+ self .assertEqual (result .name , "mytable" )
2217+ self .assertEqual (result .schema , "mydb" )
2218+
2219+ def test_get_table_details_cosmos_namespace_parses_colls_name (self ):
2220+ """Azure Cosmos DB events carry the database in the namespace path."""
2221+ data = {
2222+ "namespace" : "azurecosmos://host.documents.azure.com/dbs/mydb" ,
2223+ "name" : "colls/orders" ,
2224+ }
2225+ result = OpenlineageSource ._get_table_details (data )
2226+ self .assertEqual (result .name , "orders" )
2227+ self .assertEqual (result .schema , "mydb" )
2228+
2229+ def test_get_entity_details_glue_namespace_resolves_to_table (self ):
2230+ """Glue ARN namespace + table/{db}/{table} name resolves to a table entity."""
2231+ data = {
2232+ "namespace" : "arn:aws:glue:us-east-1:123456789012" ,
2233+ "name" : "table/sales/users" ,
2234+ "facets" : {},
2235+ }
2236+ result = OpenlineageSource ._get_entity_details (data )
2237+ self .assertIsNotNone (result )
2238+ self .assertEqual (result .entity_type , "table" )
2239+ self .assertEqual (result .table_details .name , "users" )
2240+ self .assertEqual (result .table_details .schema , "sales" )
2241+
2242+ def test_get_entity_details_unparseable_name_raises_value_error (self ):
2243+ """Unrecognised name formats raise ValueError so callers can surface the error."""
2244+ data = {"namespace" : "trino://host:8080" , "name" : "invalidname" }
2245+ with self .assertRaises (ValueError ):
2246+ OpenlineageSource ._get_entity_details (data )
2247+
20962248
20972249if __name__ == "__main__" :
20982250 unittest .main ()
0 commit comments