Skip to content

Commit 433317a

Browse files
IceS2Copilot
andcommitted
fix(profiler): SAP HANA CREATE_TIME from SYS.TABLES + uppercase catalog matching (#27483)
* fix(profiler): SAP HANA CREATE_TIME sourced from SYS.TABLES and uppercase catalog matching HANA's SYS.M_TABLES lacks CREATE_TIME — query SYS.TABLES via CTE + LEFT JOIN. HANA catalog stores identifiers uppercase but OM stores lowercase — apply .upper() to WHERE filters. * fix checkstyle * fix(profiler): address PR review — null guard, tighter assertions, Black formatting - Guard against None schema/table name before .upper() in HANA compute - Tighten CTE assertion to check both M_TABLES and TABLES definitions - Make nonexistent-table test distinct by verifying uppercase in query - Apply Black formatting fixes * fix(profiler): tighten HANA CTE assertions and restore compute docstring Agent-Logs-Url: https://github.com/open-metadata/OpenMetadata/sessions/175f9aa0-30e5-49a2-84af-d4e76fb01a14 Co-authored-by: IceS2 <4912399+IceS2@users.noreply.github.com> * fix(profiler): normalize HANA SQL assertions for stable CTE checks Agent-Logs-Url: https://github.com/open-metadata/OpenMetadata/sessions/175f9aa0-30e5-49a2-84af-d4e76fb01a14 Co-authored-by: IceS2 <4912399+IceS2@users.noreply.github.com> * fix(profiler): add docstring and fallback to super().compute() for missing names - Restore docstring on HANA compute() for consistency with other computers - Fall back to super().compute() instead of None when schema/table name missing - Include schema/table values in warning log for debugging --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent ee147c7 commit 433317a

2 files changed

Lines changed: 171 additions & 10 deletions

File tree

ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -728,23 +728,64 @@ class SAPHanaTableMetricComputer(BaseTableMetricComputer):
728728
"""SAP HANA Table Metric Computer"""
729729

730730
def compute(self):
731-
"""compute table metrics for SAP HANA using SYS.M_TABLES"""
731+
"""Compute table metrics from SYS.M_TABLES and CREATE_TIME from SYS.TABLES."""
732+
if not self.schema_name or not self.table_name:
733+
logger.warning(
734+
"Missing schema or table name for HANA table metric computation. "
735+
"Falling back to base computation with schema_name=%r, table_name=%r",
736+
self.schema_name,
737+
self.table_name,
738+
)
739+
return super().compute()
740+
# HANA system catalog stores identifiers in uppercase
741+
schema_upper = self.schema_name.upper()
742+
table_upper = self.table_name.upper()
743+
744+
m_tables_cte = cte(
745+
self._build_query(
746+
[
747+
Column("SCHEMA_NAME"),
748+
Column("TABLE_NAME"),
749+
Column("RECORD_COUNT"),
750+
Column("TABLE_SIZE"),
751+
],
752+
self._build_table("M_TABLES", "SYS"),
753+
[
754+
Column("SCHEMA_NAME") == schema_upper,
755+
Column("TABLE_NAME") == table_upper,
756+
],
757+
)
758+
)
759+
760+
tables_cte = cte(
761+
self._build_query(
762+
[
763+
Column("SCHEMA_NAME"),
764+
Column("TABLE_NAME"),
765+
Column("CREATE_TIME"),
766+
],
767+
self._build_table("TABLES", "SYS"),
768+
[
769+
Column("SCHEMA_NAME") == schema_upper,
770+
Column("TABLE_NAME") == table_upper,
771+
],
772+
)
773+
)
774+
732775
columns = [
733776
Column("RECORD_COUNT").label(ROW_COUNT),
734777
Column("TABLE_SIZE").label(SIZE_IN_BYTES),
735778
Column("CREATE_TIME").label(CREATE_DATETIME),
736779
*self._get_col_names_and_count(),
737780
]
738781

739-
where_clause = [
740-
Column("SCHEMA_NAME") == self.schema_name,
741-
Column("TABLE_NAME") == self.table_name,
742-
]
743-
744-
query = self._build_query(
745-
columns,
746-
self._build_table("M_TABLES", "SYS"),
747-
where_clause,
782+
query = self._build_query(columns, m_tables_cte).join(
783+
tables_cte,
784+
and_(
785+
m_tables_cte.c.SCHEMA_NAME == tables_cte.c.SCHEMA_NAME,
786+
m_tables_cte.c.TABLE_NAME == tables_cte.c.TABLE_NAME,
787+
),
788+
isouter=True,
748789
)
749790

750791
res = self.runner._session.execute(query).first()

ingestion/tests/unit/profiler/test_table_metric_computer.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,20 @@ def test_compute_returns_result(self):
250250
assert result is mock_result
251251
assert result.rowCount == 2500
252252

253+
def test_compute_queries_create_time_from_sys_tables_not_m_tables(self):
254+
session = _build_mock_session()
255+
mock_result = MagicMock()
256+
mock_result.rowCount = 100
257+
session.execute.return_value.first.return_value = mock_result
258+
computer = _build_computer(session, SAPHanaTableMetricComputer)
259+
computer.compute()
260+
sql = str(session.execute.call_args[0][0].compile())
261+
assert (
262+
'"SYS"."TABLES"' in sql or "SYS.TABLES" in sql
263+
), "CREATE_TIME must come from SYS.TABLES, not SYS.M_TABLES"
264+
assert "CREATE_TIME" in sql
265+
assert "M_TABLES" in sql
266+
253267
def test_compute_returns_none_when_no_result(self):
254268
session = _build_mock_session()
255269
session.execute.return_value.first.return_value = None
@@ -289,3 +303,109 @@ def test_compute_returns_result_for_zero_row_count_regular_table(self):
289303
)
290304
result = computer.compute()
291305
assert result is mock_result
306+
307+
def test_compute_uppercases_schema_and_table_in_where_clause(self):
308+
"""MockModel has lowercase schema='test_schema' and table='test_table'.
309+
HANA catalog stores identifiers in uppercase — WHERE must use .upper()."""
310+
session = _build_mock_session()
311+
mock_result = MagicMock()
312+
mock_result.rowCount = 10
313+
session.execute.return_value.first.return_value = mock_result
314+
computer = _build_computer(session, SAPHanaTableMetricComputer)
315+
computer.compute()
316+
sql = str(
317+
session.execute.call_args[0][0].compile(
318+
compile_kwargs={"literal_binds": True}
319+
)
320+
)
321+
assert (
322+
"TEST_SCHEMA" in sql
323+
), f"WHERE clause must use uppercased schema name, got: {sql}"
324+
assert (
325+
"TEST_TABLE" in sql
326+
), f"WHERE clause must use uppercased table name, got: {sql}"
327+
assert (
328+
"test_schema" not in sql.split("FROM")[1] if "FROM" in sql else True
329+
), "Lowercase schema name must not appear in WHERE clauses"
330+
331+
def test_compute_returns_result_when_create_time_is_none(self):
332+
"""LEFT JOIN means CREATE_TIME can be NULL (table in M_TABLES but not TABLES).
333+
Should still return result — not fall back to base compute."""
334+
session = _build_mock_session()
335+
mock_result = MagicMock()
336+
mock_result.rowCount = 50
337+
mock_result.createDateTime = None
338+
session.execute.return_value.first.return_value = mock_result
339+
computer = _build_computer(session, SAPHanaTableMetricComputer)
340+
with patch.object(
341+
BaseTableMetricComputer, "compute", return_value="fallback"
342+
) as base_compute:
343+
result = computer.compute()
344+
assert result is mock_result
345+
base_compute.assert_not_called()
346+
347+
def test_compute_uses_two_ctes_with_left_join(self):
348+
"""Query must have two CTEs (M_TABLES + TABLES) joined with LEFT OUTER JOIN."""
349+
session = _build_mock_session()
350+
mock_result = MagicMock()
351+
mock_result.rowCount = 10
352+
session.execute.return_value.first.return_value = mock_result
353+
computer = _build_computer(session, SAPHanaTableMetricComputer)
354+
computer.compute()
355+
sql = str(
356+
session.execute.call_args[0][0].compile(
357+
compile_kwargs={"literal_binds": True}
358+
)
359+
)
360+
sql_upper = sql.upper()
361+
normalized_sql = " ".join(sql_upper.split())
362+
sql_without_quotes = normalized_sql.replace('"', "")
363+
assert "WITH " in normalized_sql, f"Expected WITH clause in query, got: {sql}"
364+
assert (
365+
sql_without_quotes.count(" AS (") >= 2
366+
), f"Expected two CTE definitions in query, got: {sql}"
367+
assert (
368+
"FROM SYS.M_TABLES" in sql_without_quotes
369+
), f"Expected M_TABLES source in query, got: {sql}"
370+
assert (
371+
"FROM SYS.TABLES" in sql_without_quotes
372+
), f"Expected TABLES source in query, got: {sql}"
373+
assert (
374+
"LEFT OUTER JOIN" in normalized_sql or "LEFT JOIN" in normalized_sql
375+
), f"TABLES CTE must be LEFT JOINed, got: {sql}"
376+
377+
def test_compute_returns_none_for_nonexistent_table(self):
378+
"""When table absent from HANA system views, compute returns None and
379+
still queries using uppercased identifiers expected by the catalog."""
380+
session = _build_mock_session()
381+
session.execute.return_value.first.return_value = None
382+
computer = _build_computer(session, SAPHanaTableMetricComputer)
383+
result = computer.compute()
384+
sql = str(
385+
session.execute.call_args[0][0].compile(
386+
compile_kwargs={"literal_binds": True}
387+
)
388+
)
389+
assert result is None
390+
assert (
391+
"TEST_SCHEMA" in sql
392+
), f"Nonexistent-table lookup must use uppercased schema, got: {sql}"
393+
assert (
394+
"TEST_TABLE" in sql
395+
), f"Nonexistent-table lookup must use uppercased table, got: {sql}"
396+
397+
def test_compute_includes_column_count_and_names(self):
398+
"""Result query must include columnCount and columnNames labels."""
399+
session = _build_mock_session()
400+
mock_result = MagicMock()
401+
mock_result.rowCount = 10
402+
session.execute.return_value.first.return_value = mock_result
403+
computer = _build_computer(session, SAPHanaTableMetricComputer)
404+
computer.compute()
405+
sql = str(
406+
session.execute.call_args[0][0].compile(
407+
compile_kwargs={"literal_binds": True}
408+
)
409+
)
410+
assert "columnCount" in sql, f"Query must select columnCount, got: {sql}"
411+
assert "columnNames" in sql, f"Query must select columnNames, got: {sql}"

0 commit comments

Comments
 (0)