Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -648,16 +648,23 @@
raise ValueError("No Database found in the context. We cannot run the table deletion.")
if self.source_config.markDeletedTables:
logger.info(f"Mark Deleted Tables set to True. Processing database [{self.context.get().database}]") # pyright: ignore[reportAttributeAccessIssue]
# Drain the global list so it stays bounded to one catalog's
# deletions instead of growing across the whole run.
with self._state_lock:
deleted_tables = list(
self.context.get_global().deleted_tables # pyright: ignore[reportAttributeAccessIssue]
)
self.context.get_global().deleted_tables.clear() # pyright: ignore[reportAttributeAccessIssue]
yield from delete_entity_by_name(
Comment on lines +651 to 658
self.metadata,
entity_type=Table,
entity_names=self.context.get_global().deleted_tables, # pyright: ignore[reportAttributeAccessIssue]
entity_names=deleted_tables,
recursive=self.source_config.markDeletedTables,
)
else:
yield from super().mark_tables_as_deleted()

def add_complex_datatype_descriptions(self, column: Column, column_json: ColumnJson):

Check failure on line 667 in ingestion/src/metadata/ingestion/source/database/unitycatalog/metadata.py

View check run for this annotation

SonarQubeCloud / [open-metadata-ingestion] SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 20 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=open-metadata-ingestion&issues=AZ6tHOWQchQzNLXl73BS&open=AZ6tHOWQchQzNLXl73BS&pullRequest=28648
"""
Method to add descriptions to complex datatypes
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@
import textwrap

UNITY_CATALOG_GET_CATALOGS_TAGS = """
SELECT * FROM `{database}`.information_schema.catalog_tags;
SELECT catalog_name, tag_name, tag_value FROM `{database}`.information_schema.catalog_tags;
"""

UNITY_CATALOG_GET_ALL_SCHEMA_TAGS = """
SELECT * FROM `{database}`.information_schema.schema_tags;
SELECT catalog_name, schema_name, tag_name, tag_value FROM `{database}`.information_schema.schema_tags;
"""

UNITY_CATALOG_GET_ALL_TABLE_TAGS = """
SELECT * FROM `{database}`.information_schema.table_tags WHERE schema_name = '{schema}';
SELECT catalog_name, schema_name, table_name, tag_name, tag_value
FROM `{database}`.information_schema.table_tags WHERE schema_name = '{schema}';
"""

UNITY_CATALOG_GET_ALL_TABLE_COLUMNS_TAGS = """
SELECT * FROM `{database}`.information_schema.column_tags WHERE schema_name = '{schema}';
SELECT catalog_name, schema_name, table_name, column_name, tag_name, tag_value
FROM `{database}`.information_schema.column_tags WHERE schema_name = '{schema}';
"""

UNITY_CATALOG_SQL_STATEMENT = textwrap.dedent(
Expand All @@ -52,37 +54,42 @@

UNITY_CATALOG_GET_TABLE_DDL = "SHOW CREATE TABLE `{database}`.`{schema}`.`{table}`"

UNITY_CATALOG_TABLE_LINEAGE = textwrap.dedent(
"""
SELECT
source_table_full_name,
target_table_full_name
FROM system.access.table_lineage
WHERE event_time >= current_date() - INTERVAL {query_log_duration} DAYS
AND source_table_full_name IS NOT NULL
AND target_table_full_name IS NOT NULL
GROUP BY source_table_full_name, target_table_full_name
"""
)

UNITY_CATALOG_COLUMN_LINEAGE = textwrap.dedent(
"""
UNITY_CATALOG_LINEAGE = textwrap.dedent(
"""
WITH column_pairs AS (
SELECT
source_table_full_name,
target_table_full_name,
collect_set(
struct(source_column_name AS u, target_column_name AS d)
) AS pairs
FROM system.access.column_lineage
WHERE event_time >= to_timestamp('{start_time}')
AND event_time < to_timestamp('{end_time}')
AND source_table_full_name IS NOT NULL
AND target_table_full_name IS NOT NULL
AND source_column_name IS NOT NULL
Comment thread
gitar-bot[bot] marked this conversation as resolved.
AND target_column_name IS NOT NULL
GROUP BY source_table_full_name, target_table_full_name
),
table_edges AS (
SELECT DISTINCT
source_table_full_name,
target_table_full_name
FROM system.access.table_lineage
WHERE event_time >= to_timestamp('{start_time}')
AND event_time < to_timestamp('{end_time}')
AND source_table_full_name IS NOT NULL
AND target_table_full_name IS NOT NULL
Comment thread
ulixius9 marked this conversation as resolved.
)
Comment thread
gitar-bot[bot] marked this conversation as resolved.
SELECT
source_table_full_name,
source_column_name,
target_table_full_name,
target_column_name
FROM system.access.column_lineage
WHERE event_time >= current_date() - INTERVAL {query_log_duration} DAYS
AND source_table_full_name IS NOT NULL
AND target_table_full_name IS NOT NULL
AND source_column_name IS NOT NULL
AND target_column_name IS NOT NULL
GROUP BY
source_table_full_name,
source_column_name,
target_table_full_name,
target_column_name
t.source_table_full_name AS source_table_full_name,
t.target_table_full_name AS target_table_full_name,
to_json(c.pairs) AS column_pairs
FROM table_edges t
LEFT JOIN column_pairs c
ON c.source_table_full_name = t.source_table_full_name
AND c.target_table_full_name = t.target_table_full_name
"""
)

Expand Down
Loading
Loading