diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py index ba8697241e04..289a35f0b9a3 100644 --- a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py +++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py @@ -793,7 +793,50 @@ class SAPHanaTableMetricComputer(BaseTableMetricComputer): """SAP HANA Table Metric Computer""" def compute(self): - """compute table metrics for SAP HANA using SYS.M_TABLES""" + """Compute table metrics from SYS.M_TABLES and CREATE_TIME from SYS.TABLES.""" + if not self.schema_name or not self.table_name: + logger.warning( + "Missing schema or table name for HANA table metric computation. " + "Falling back to base computation with schema_name=%r, table_name=%r", + self.schema_name, + self.table_name, + ) + return super().compute() + # HANA system catalog stores identifiers in uppercase + schema_upper = self.schema_name.upper() + table_upper = self.table_name.upper() + + m_tables_cte = cte( + self._build_query( + [ + Column("SCHEMA_NAME"), + Column("TABLE_NAME"), + Column("RECORD_COUNT"), + Column("TABLE_SIZE"), + ], + self._build_table("M_TABLES", "SYS"), + [ + Column("SCHEMA_NAME") == schema_upper, + Column("TABLE_NAME") == table_upper, + ], + ) + ) + + tables_cte = cte( + self._build_query( + [ + Column("SCHEMA_NAME"), + Column("TABLE_NAME"), + Column("CREATE_TIME"), + ], + self._build_table("TABLES", "SYS"), + [ + Column("SCHEMA_NAME") == schema_upper, + Column("TABLE_NAME") == table_upper, + ], + ) + ) + columns = [ Column("RECORD_COUNT").label(ROW_COUNT), Column("TABLE_SIZE").label(SIZE_IN_BYTES), @@ -801,15 +844,13 @@ def compute(self): *self._get_col_names_and_count(), ] - where_clause = [ - Column("SCHEMA_NAME") == self.schema_name, - Column("TABLE_NAME") == self.table_name, - ] - - query = self._build_query( - columns, - self._build_table("M_TABLES", "SYS"), - where_clause, + query = self._build_query(columns, m_tables_cte).join( + tables_cte, + and_( + m_tables_cte.c.SCHEMA_NAME == tables_cte.c.SCHEMA_NAME, + m_tables_cte.c.TABLE_NAME == tables_cte.c.TABLE_NAME, + ), + isouter=True, ) res = self.runner._session.execute(query).first() diff --git a/ingestion/tests/unit/observability/profiler/test_table_metric_computer.py b/ingestion/tests/unit/observability/profiler/test_table_metric_computer.py index 569b51185212..0d6be63eec7c 100644 --- a/ingestion/tests/unit/observability/profiler/test_table_metric_computer.py +++ b/ingestion/tests/unit/observability/profiler/test_table_metric_computer.py @@ -252,6 +252,20 @@ def test_compute_returns_result(self): assert result is mock_result assert result.rowCount == 2500 + def test_compute_queries_create_time_from_sys_tables_not_m_tables(self): + session = _build_mock_session() + mock_result = MagicMock() + mock_result.rowCount = 100 + session.execute.return_value.first.return_value = mock_result + computer = _build_computer(session, SAPHanaTableMetricComputer) + computer.compute() + sql = str(session.execute.call_args[0][0].compile()) + assert ( + '"SYS"."TABLES"' in sql or "SYS.TABLES" in sql + ), "CREATE_TIME must come from SYS.TABLES, not SYS.M_TABLES" + assert "CREATE_TIME" in sql + assert "M_TABLES" in sql + def test_compute_returns_none_when_no_result(self): session = _build_mock_session() session.execute.return_value.first.return_value = None @@ -291,3 +305,109 @@ def test_compute_returns_result_for_zero_row_count_regular_table(self): ) result = computer.compute() assert result is mock_result + + def test_compute_uppercases_schema_and_table_in_where_clause(self): + """MockModel has lowercase schema='test_schema' and table='test_table'. + HANA catalog stores identifiers in uppercase — WHERE must use .upper().""" + session = _build_mock_session() + mock_result = MagicMock() + mock_result.rowCount = 10 + session.execute.return_value.first.return_value = mock_result + computer = _build_computer(session, SAPHanaTableMetricComputer) + computer.compute() + sql = str( + session.execute.call_args[0][0].compile( + compile_kwargs={"literal_binds": True} + ) + ) + assert ( + "TEST_SCHEMA" in sql + ), f"WHERE clause must use uppercased schema name, got: {sql}" + assert ( + "TEST_TABLE" in sql + ), f"WHERE clause must use uppercased table name, got: {sql}" + assert ( + "test_schema" not in sql.split("FROM")[1] if "FROM" in sql else True + ), "Lowercase schema name must not appear in WHERE clauses" + + def test_compute_returns_result_when_create_time_is_none(self): + """LEFT JOIN means CREATE_TIME can be NULL (table in M_TABLES but not TABLES). + Should still return result — not fall back to base compute.""" + session = _build_mock_session() + mock_result = MagicMock() + mock_result.rowCount = 50 + mock_result.createDateTime = None + session.execute.return_value.first.return_value = mock_result + computer = _build_computer(session, SAPHanaTableMetricComputer) + with patch.object( + BaseTableMetricComputer, "compute", return_value="fallback" + ) as base_compute: + result = computer.compute() + assert result is mock_result + base_compute.assert_not_called() + + def test_compute_uses_two_ctes_with_left_join(self): + """Query must have two CTEs (M_TABLES + TABLES) joined with LEFT OUTER JOIN.""" + session = _build_mock_session() + mock_result = MagicMock() + mock_result.rowCount = 10 + session.execute.return_value.first.return_value = mock_result + computer = _build_computer(session, SAPHanaTableMetricComputer) + computer.compute() + sql = str( + session.execute.call_args[0][0].compile( + compile_kwargs={"literal_binds": True} + ) + ) + sql_upper = sql.upper() + normalized_sql = " ".join(sql_upper.split()) + sql_without_quotes = normalized_sql.replace('"', "") + assert "WITH " in normalized_sql, f"Expected WITH clause in query, got: {sql}" + assert ( + sql_without_quotes.count(" AS (") >= 2 + ), f"Expected two CTE definitions in query, got: {sql}" + assert ( + "FROM SYS.M_TABLES" in sql_without_quotes + ), f"Expected M_TABLES source in query, got: {sql}" + assert ( + "FROM SYS.TABLES" in sql_without_quotes + ), f"Expected TABLES source in query, got: {sql}" + assert ( + "LEFT OUTER JOIN" in normalized_sql or "LEFT JOIN" in normalized_sql + ), f"TABLES CTE must be LEFT JOINed, got: {sql}" + + def test_compute_returns_none_for_nonexistent_table(self): + """When table absent from HANA system views, compute returns None and + still queries using uppercased identifiers expected by the catalog.""" + session = _build_mock_session() + session.execute.return_value.first.return_value = None + computer = _build_computer(session, SAPHanaTableMetricComputer) + result = computer.compute() + sql = str( + session.execute.call_args[0][0].compile( + compile_kwargs={"literal_binds": True} + ) + ) + assert result is None + assert ( + "TEST_SCHEMA" in sql + ), f"Nonexistent-table lookup must use uppercased schema, got: {sql}" + assert ( + "TEST_TABLE" in sql + ), f"Nonexistent-table lookup must use uppercased table, got: {sql}" + + def test_compute_includes_column_count_and_names(self): + """Result query must include columnCount and columnNames labels.""" + session = _build_mock_session() + mock_result = MagicMock() + mock_result.rowCount = 10 + session.execute.return_value.first.return_value = mock_result + computer = _build_computer(session, SAPHanaTableMetricComputer) + computer.compute() + sql = str( + session.execute.call_args[0][0].compile( + compile_kwargs={"literal_binds": True} + ) + ) + assert "columnCount" in sql, f"Query must select columnCount, got: {sql}" + assert "columnNames" in sql, f"Query must select columnNames, got: {sql}"