@@ -322,54 +322,77 @@ def _catalog_storage(self):
322322 self ._spark ._jvm .org .apache .spark .sql .catalyst .catalog .CatalogStorageFormat # pylint: disable=protected-access
323323 )
324324
325- @staticmethod
326- def _get_entity_storage_locations (table_metadata ):
327- """Obtain the entityStorageLocations property for table metadata, if the property is present."""
328- # This is needed because:
329- # - DBR 16.0 introduced entityStorageLocations as a property on table metadata, and this is required for
330- # as a constructor parameter for CatalogTable.
331- # - We need to be compatible with earlier versions of DBR.
332- # - The normal hasattr() check does not work with Py4J-based objects: it always returns True and non-existent
333- # methods will be automatically created on the proxy but fail when invoked.
334- # Instead the only approach is to use dir() to check if the method exists _prior_ to trying to access it.
335- # (After trying to access it, dir() will also include it even though it doesn't exist.)
336- return table_metadata .entityStorageLocations () if 'entityStorageLocations' in dir (table_metadata ) else None
337-
338325 def _convert_hms_table_to_external (self , src_table : Table ) -> bool :
339326 """Converts a Hive metastore table to external using Spark JVM methods."""
340327 logger .info (f"Changing HMS managed table { src_table .name } to External Table type." )
341328 inventory_table = self ._tables_crawler .full_name
329+ database = self ._spark ._jvm .scala .Some (src_table .database ) # pylint: disable=protected-access
342330 try :
343- database = self ._spark ._jvm .scala .Some (src_table .database ) # pylint: disable=protected-access
344331 table_identifier = self ._table_identifier (src_table .name , database )
345332 old_table = self ._catalog .getTableMetadata (table_identifier )
346- entity_storage_locations = self ._get_entity_storage_locations (old_table )
347- new_table = self ._catalog_table (
348- old_table .identifier (),
349- self ._catalog_type ('EXTERNAL' ),
350- old_table .storage (),
351- old_table .schema (),
352- old_table .provider (),
353- old_table .partitionColumnNames (),
354- old_table .bucketSpec (),
355- old_table .owner (),
356- old_table .createTime (),
357- old_table .lastAccessTime (),
358- old_table .createVersion (),
359- old_table .properties (),
360- old_table .stats (),
361- old_table .viewText (),
362- old_table .comment (),
363- old_table .unsupportedFeatures (),
364- old_table .tracksPartitionsInCatalog (),
365- old_table .schemaPreservesCase (),
366- old_table .ignoredProperties (),
367- old_table .viewOriginalText (),
368- # From DBR 16, there's a new constructor argument: entityStorageLocations (Seq[EntityStorageLocation])
369- # (We can't detect whether the argument is needed by the constructor, but assume that if the accessor
370- # is present on the source table then the argument is needed.)
371- * ([entity_storage_locations ] if entity_storage_locations is not None else []),
372- )
333+ # two alternative ways to create the new_table object, one for DBR15 or older one for DBR16.
334+ # Since we can't detect the DBR version from code, we try to detect if the entityStorageLocations
335+ # accessor is present on the source table metadata object.
336+ # This is needed because:
337+ # - DBR 16.0 introduced entityStorageLocations as a property on table metadata, and this is required for
338+ # as a constructor parameter for CatalogTable.
339+ # - We need to be compatible with earlier versions of DBR.
340+ # - The normal hasattr() check does not work with Py4J-based objects: it always returns True and non-existent
341+ # methods will be automatically created on the proxy but fail when invoked.
342+ # Instead the only approach is to use dir() to check if the method exists _prior_ to trying to access it.
343+ # (After trying to access it, dir() will also include it even though it doesn't exist.)
344+ if 'collation' in dir (old_table ):
345+ logger .debug ("Detected Collation property on table metadata, assuming DBR16+" )
346+ new_table = self ._catalog_table (
347+ old_table .identifier (),
348+ self ._catalog_type ('EXTERNAL' ),
349+ old_table .storage (),
350+ old_table .schema (),
351+ old_table .provider (),
352+ old_table .partitionColumnNames (),
353+ old_table .bucketSpec (),
354+ old_table .owner (),
355+ old_table .createTime (),
356+ old_table .lastAccessTime (),
357+ old_table .createVersion (),
358+ old_table .properties (),
359+ old_table .stats (),
360+ old_table .viewText (),
361+ old_table .comment (),
362+ old_table .collation (),
363+ old_table .unsupportedFeatures (),
364+ old_table .tracksPartitionsInCatalog (),
365+ old_table .schemaPreservesCase (),
366+ old_table .ignoredProperties (),
367+ old_table .viewOriginalText (),
368+ old_table .entityStorageLocations (),
369+ old_table .resourceName (),
370+ )
371+ else :
372+ logger .debug ("No Collation property on table metadata, assuming DBR15 or older" )
373+ new_table = self ._catalog_table (
374+ old_table .identifier (),
375+ self ._catalog_type ('EXTERNAL' ),
376+ old_table .storage (),
377+ old_table .schema (),
378+ old_table .provider (),
379+ old_table .partitionColumnNames (),
380+ old_table .bucketSpec (),
381+ old_table .owner (),
382+ old_table .createTime (),
383+ old_table .lastAccessTime (),
384+ old_table .createVersion (),
385+ old_table .properties (),
386+ old_table .stats (),
387+ old_table .viewText (),
388+ old_table .comment (),
389+ old_table .unsupportedFeatures (),
390+ old_table .tracksPartitionsInCatalog (),
391+ old_table .schemaPreservesCase (),
392+ old_table .ignoredProperties (),
393+ old_table .viewOriginalText (),
394+ old_table .entityStorageLocations (),
395+ )
373396 self ._catalog .alterTable (new_table )
374397 self ._update_table_status (src_table , inventory_table )
375398 except Exception as e : # pylint: disable=broad-exception-caught
@@ -392,7 +415,6 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool:
392415 return False
393416 try :
394417 old_table = self ._catalog .getTableMetadata (table_identifier )
395- entity_storage_locations = self ._get_entity_storage_locations (old_table )
396418 table_location = old_table .storage ()
397419 new_location = self ._catalog_storage (
398420 self ._spark ._jvm .scala .Some ( # pylint: disable=protected-access
@@ -404,32 +426,58 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool:
404426 table_location .compressed (),
405427 table_location .properties (),
406428 )
407- new_table = self ._catalog_table (
408- old_table .identifier (),
409- old_table .tableType (),
410- new_location ,
411- old_table .schema (),
412- old_table .provider (),
413- old_table .partitionColumnNames (),
414- old_table .bucketSpec (),
415- old_table .owner (),
416- old_table .createTime (),
417- old_table .lastAccessTime (),
418- old_table .createVersion (),
419- old_table .properties (),
420- old_table .stats (),
421- old_table .viewText (),
422- old_table .comment (),
423- old_table .unsupportedFeatures (),
424- old_table .tracksPartitionsInCatalog (),
425- old_table .schemaPreservesCase (),
426- old_table .ignoredProperties (),
427- old_table .viewOriginalText (),
428- # From DBR 16, there's a new constructor argument: entityStorageLocations (Seq[EntityStorageLocation])
429- # (We can't detect whether the argument is needed by the constructor, but assume that if the accessor
430- # is present on the source table then the argument is needed.)
431- * ([entity_storage_locations ] if entity_storage_locations is not None else []),
432- )
429+ if 'collation' in dir (old_table ):
430+ logger .debug ("Detected Collation property on table metadata, assuming DBR16+" )
431+ new_table = self ._catalog_table (
432+ old_table .identifier (),
433+ old_table .tableType (),
434+ new_location ,
435+ old_table .schema (),
436+ old_table .provider (),
437+ old_table .partitionColumnNames (),
438+ old_table .bucketSpec (),
439+ old_table .owner (),
440+ old_table .createTime (),
441+ old_table .lastAccessTime (),
442+ old_table .createVersion (),
443+ old_table .properties (),
444+ old_table .stats (),
445+ old_table .viewText (),
446+ old_table .comment (),
447+ old_table .collation (),
448+ old_table .unsupportedFeatures (),
449+ old_table .tracksPartitionsInCatalog (),
450+ old_table .schemaPreservesCase (),
451+ old_table .ignoredProperties (),
452+ old_table .viewOriginalText (),
453+ old_table .entityStorageLocations (),
454+ old_table .resourceName (),
455+ )
456+ else :
457+ logger .debug ("No Collation property on table metadata, assuming DBR15 or older" )
458+ new_table = self ._catalog_table (
459+ old_table .identifier (),
460+ old_table .tableType (),
461+ new_location ,
462+ old_table .schema (),
463+ old_table .provider (),
464+ old_table .partitionColumnNames (),
465+ old_table .bucketSpec (),
466+ old_table .owner (),
467+ old_table .createTime (),
468+ old_table .lastAccessTime (),
469+ old_table .createVersion (),
470+ old_table .properties (),
471+ old_table .stats (),
472+ old_table .viewText (),
473+ old_table .comment (),
474+ old_table .unsupportedFeatures (),
475+ old_table .tracksPartitionsInCatalog (),
476+ old_table .schemaPreservesCase (),
477+ old_table .ignoredProperties (),
478+ old_table .viewOriginalText (),
479+ old_table .entityStorageLocations (),
480+ )
433481 self ._catalog .alterTable (new_table )
434482 except Exception as e : # pylint: disable=broad-exception-caught
435483 logger .warning (f"Error converting HMS table { src_table .name } to abfss: { e } " , exc_info = True )
0 commit comments