1818import json
1919import re
2020import traceback
21+ from dataclasses import dataclass
2122from datetime import datetime , timezone
2223from typing import Any , Iterable , Optional , Tuple
2324
8384logger = ingestion_logger ()
8485
8586
87+ @dataclass (frozen = True )
88+ class SASResourceContext :
89+ """Components extracted from a SAS Information Catalog resourceId.
90+
91+ The SAS Data Tables REST API exposes table resources at paths of the form:
92+
93+ /dataTables/dataSources/{provider}~fs~{host}~fs~{library}/tables/{table}
94+
95+ where ``~fs~`` is the field separator (literal, not URL-encoded).
96+
97+ Known provider values
98+ ---------------------
99+ - ``cas`` — CAS (Cloud Analytic Services) table. *host* is the CAS
100+ server name (e.g. ``cas-shared-default``).
101+ - ``Compute`` — SAS Compute session table. *host* is a session UUID
102+ (e.g. ``49736234-36b3-48d2-b2e2-e12aa365ce05``).
103+
104+ Real-world examples
105+ -------------------
106+ CAS table:
107+ ``/dataTables/dataSources/cas~fs~cas-shared-default~fs~Samples/tables/WATER_CLUSTER``
108+ Compute table:
109+ ``/dataTables/dataSources/Compute~fs~49736234-…~fs~PUBLIC/tables/LAS_TRAIN``
110+
111+ Reference
112+ ---------
113+ SAS REST API — Data Tables service:
114+ https://developer.sas.com/rest-apis/dataTables
115+ """
116+
117+ provider : str
118+ host : str
119+ library : str
120+ raw_resource_id : str
121+
122+ @property
123+ def database_name (self ) -> str :
124+ return f"{ self .provider } .{ self .host } "
125+
126+
127+ # The field separator used inside the ``dataSources`` path segment.
128+ _SAS_FIELD_SEPARATOR = "~fs~"
129+
130+
131+ def parse_resource_id (resource_id : str ) -> Optional [SASResourceContext ]:
132+ """Parse a SAS Information Catalog resourceId into its components.
133+
134+ Returns ``None`` (instead of raising) when the resourceId does not
135+ conform to the expected shape so that callers can cleanly fall back
136+ to the relationships-based lookup.
137+ """
138+ segments = resource_id .split ("/" )
139+ # Expected: ['', 'dataTables', 'dataSources', '<context>', 'tables', ...]
140+ if len (segments ) < 4 :
141+ logger .warning (
142+ "resourceId %r has fewer than 4 slash-delimited segments; "
143+ "cannot extract provider/host/library." ,
144+ resource_id ,
145+ )
146+ return None
147+
148+ context = segments [3 ]
149+ parts = context .split (_SAS_FIELD_SEPARATOR )
150+ if len (parts ) < 3 :
151+ logger .warning (
152+ "resourceId context segment %r has %d field(s) (expected 3: "
153+ "provider, host, library); cannot derive database/schema." ,
154+ context ,
155+ len (parts ),
156+ )
157+ return None
158+
159+ return SASResourceContext (
160+ provider = parts [0 ],
161+ host = parts [1 ],
162+ library = parts [2 ],
163+ raw_resource_id = resource_id ,
164+ )
165+
166+
86167class SasSource (
87168 DatabaseServiceSource
88169): # pylint: disable=too-many-instance-attributes,too-many-public-methods
@@ -232,53 +313,77 @@ def create_database_alt(self, db):
232313
233314 def create_database_schema (self , table ):
234315 """
235- create database schema
316+ Create database and schema entities for the given table.
317+
318+ First attempts to derive provider/host/library from the table's
319+ ``resourceId`` via ``parse_resource_id``. If the resourceId does
320+ not match the expected SAS Data Tables shape, or the resulting
321+ create/update call fails, falls back to a relationships-based
322+ lookup through the Information Catalog.
236323 """
237- try :
238- context = table [ "resourceId" ]. split ( "/" )[ 3 ]
324+ resource_id = table . get ( "resourceId" , "" )
325+ ctx = parse_resource_id ( resource_id )
239326
240- provider = context .split ("~" )[0 ]
241- self .db_name = provider + "." + context .split ("~" )[2 ]
242- self .db_schema_name = context .split ("~" )[4 ]
327+ if ctx is not None :
328+ try :
329+ self .db_name = ctx .database_name
330+ self .db_schema_name = ctx .library
243331
244- database = CreateDatabaseRequest (
245- name = self .db_name ,
246- displayName = self .db_name ,
247- service = self .config .serviceName ,
248- )
249- database = self .metadata .create_or_update (data = database )
332+ database = CreateDatabaseRequest (
333+ name = self .db_name ,
334+ displayName = self .db_name ,
335+ service = self .config .serviceName ,
336+ )
337+ database = self .metadata .create_or_update (data = database )
250338
251- db_schema = CreateDatabaseSchemaRequest (
252- name = self .db_schema_name , database = database .fullyQualifiedName
253- )
254- db_schema_entity = self .metadata .create_or_update (db_schema )
255- return db_schema_entity
256-
257- except HTTPError as _ :
258- # Find the "database" entity in Information Catalog
259- # First see if the table is a member of the library through the relationships attribute
260- # Or we could use views to query the dataStores
261- data_store_data_sets = "4b114f6e-1c2a-4060-9184-6809a612f27b"
262- data_store_id = None
263- for relation in table ["relationships" ]:
264- if relation ["definitionId" ] != data_store_data_sets :
265- continue
266- data_store_id = relation ["endpointId" ]
267- break
339+ db_schema = CreateDatabaseSchemaRequest (
340+ name = self .db_schema_name , database = database .fullyQualifiedName
341+ )
342+ return self .metadata .create_or_update (db_schema )
343+
344+ except HTTPError as exc :
345+ logger .debug (
346+ "Falling back to relationships-based schema lookup for "
347+ "%s after HTTP error: %s" ,
348+ resource_id ,
349+ exc ,
350+ )
268351
269- if data_store_id is None :
270- # log error due to exclude amount of work with tables in dataTables
271- logger .error ("Data store id should not be none" )
272- return None
352+ return self ._create_database_schema_from_relationships (table )
273353
274- data_store = self .sas_client .get_instance (data_store_id )
275- database = self .create_database_alt (data_store )
276- self .db_schema_name = data_store ["name" ]
277- db_schema = CreateDatabaseSchemaRequest (
278- name = data_store ["name" ], database = database .fullyQualifiedName
354+ def _create_database_schema_from_relationships (self , table ):
355+ """Derive database/schema from the table's catalog relationships.
356+
357+ This is the fallback path when ``parse_resource_id`` returns
358+ ``None`` or the primary create fails. It looks for a
359+ ``dataStoreDataSets`` relationship to locate the parent data
360+ store, then uses ``create_database_alt`` for the database entity.
361+ """
362+ data_store_data_sets = "4b114f6e-1c2a-4060-9184-6809a612f27b"
363+ data_store_id = None
364+ for relation in table .get ("relationships" , []):
365+ if relation ["definitionId" ] != data_store_data_sets :
366+ continue
367+ data_store_id = relation ["endpointId" ]
368+ break
369+
370+ if data_store_id is None :
371+ logger .error (
372+ "Failed to derive database schema for SAS table '%s' (resourceId=%s): "
373+ "missing data store identifier because the expected "
374+ "'dataStoreDataSets' relationship was not found." ,
375+ table .get ("name" , "<unknown>" ),
376+ table .get ("resourceId" , "<missing>" ),
279377 )
280- db_schema_entity = self .metadata .create_or_update (db_schema )
281- return db_schema_entity
378+ return None
379+
380+ data_store = self .sas_client .get_instance (data_store_id )
381+ database = self .create_database_alt (data_store )
382+ self .db_schema_name = data_store ["name" ]
383+ db_schema = CreateDatabaseSchemaRequest (
384+ name = data_store ["name" ], database = database .fullyQualifiedName
385+ )
386+ return self .metadata .create_or_update (db_schema )
282387
283388 def create_columns_alt (self , table ):
284389 """
@@ -439,6 +544,7 @@ def create_table_entity(self, table) -> Iterable[Either[CreateTableRequest]]:
439544 global table_fqn
440545
441546 table_entity , table_fqn = None , None
547+ table_name = table .get ("name" ) if isinstance (table , dict ) else None
442548
443549 try :
444550 table_url = self .sas_client .get_information_catalog_link (table ["id" ])
@@ -506,10 +612,13 @@ def create_table_entity(self, table) -> Iterable[Either[CreateTableRequest]]:
506612 custom_attributes = [
507613 custom_attribute ["name" ] for custom_attribute in TABLE_CUSTOM_ATTR
508614 ]
615+ # Drop null values — OpenMetadata's custom-field types
616+ # (e.g. STRING_TYPE) reject null and fail the create with
617+ # "Custom field <name> has invalid JSON [$: null found, string expected]"
509618 extension_attributes = {
510619 attr : value
511620 for attr , value in table_extension .items ()
512- if attr in custom_attributes
621+ if attr in custom_attributes and value is not None
513622 }
514623
515624 table_request = CreateTableRequest (
@@ -529,6 +638,18 @@ def create_table_entity(self, table) -> Iterable[Either[CreateTableRequest]]:
529638 table_entity = self .metadata .get_by_name (
530639 entity = Table , fqn = self .get_table_fqn (table_name )
531640 )
641+ # If the table wasn't actually persisted (e.g. the sink
642+ # rejected the CreateTableRequest), skip the follow-up
643+ # patch/profile calls so we don't raise an AttributeError
644+ # that masks the real sink-side failure.
645+ if table_entity is None :
646+ logger .warning (
647+ f"Table [{ table_name } ] was not created in OpenMetadata; "
648+ "skipping description/extension/profile updates. "
649+ "Check the sink logs for the underlying error."
650+ )
651+ return
652+
532653 # update the description
533654 logger .debug (
534655 f"Updating description for { table_entity .id .root } with { table_description } "
@@ -595,10 +716,13 @@ def create_table_entity(self, table) -> Iterable[Either[CreateTableRequest]]:
595716
596717 except Exception as exc :
597718 logger .error (f"table failed to create: { table } " )
719+ error_name = table_name or (
720+ table .get ("id" ) if isinstance (table , dict ) else "unknown"
721+ )
598722 yield Either (
599723 left = StackTraceError (
600- name = table_name ,
601- error = f"Unexpected exception to create table [{ table_name } ]: { exc } " ,
724+ name = str ( error_name ) ,
725+ error = f"Unexpected exception to create table [{ error_name } ]: { exc } " ,
602726 stackTrace = traceback .format_exc (),
603727 )
604728 )
@@ -637,7 +761,7 @@ def create_lineage_table_source(self, table_extension, table_name):
637761 entity = Table , fqn = source_table_fqn
638762 )
639763
640- if source_table_entity :
764+ if source_table_entity and target_table_entity :
641765 yield from self .create_table_lineage (
642766 source_table_entity , target_table_entity
643767 )
0 commit comments