Skip to content

Commit 878eb74

Browse files
committed
fix(starrocks): query INFORMATION_SCHEMA for table comments (#26692)
StarRocks DDL places COMMENT on a separate line, causing SQLAlchemy's MySQL parser to silently drop table descriptions. Override get_table_description() to fetch TABLE_COMMENT directly from INFORMATION_SCHEMA.TABLES instead.
1 parent 58a76d7 commit 878eb74

2 files changed

Lines changed: 89 additions & 13 deletions

File tree

ingestion/src/metadata/ingestion/source/database/starrocks/metadata.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
STARROCKS_GET_TABLE_NAMES,
4242
STARROCKS_PARTITION_DETAILS,
4343
STARROCKS_SHOW_FULL_COLUMNS,
44+
STARROCKS_TABLE_COMMENTS,
4445
)
4546
from metadata.utils.logger import ingestion_logger
4647
from metadata.utils.ssl_manager import SSLManager, check_ssl_and_init
@@ -269,26 +270,24 @@ def query_view_names_and_types(
269270

270271
return tables
271272

272-
@staticmethod
273+
# pylint: disable=arguments-differ
273274
def get_table_description(
274-
schema_name: str, table_name: str, inspector: Inspector
275+
self, schema_name: str, table_name: str, inspector: Inspector
275276
) -> Optional[str]:
276-
description = None
277277
try:
278-
table_info: dict = inspector.get_table_comment(table_name, schema_name)
278+
result = self.connection.execute(
279+
sql.text(STARROCKS_TABLE_COMMENTS),
280+
{"schema": schema_name, "table_name": table_name},
281+
)
282+
row = result.first()
283+
if row:
284+
return row[0] or None
279285
except Exception as exc: # pylint: disable=broad-except
280286
logger.debug(traceback.format_exc())
281287
logger.warning(
282288
f"Table description error for table [{schema_name}.{table_name}]: {exc}"
283289
)
284-
else:
285-
description = table_info.get("text")
286-
287-
if description is None:
288-
return None
289-
if isinstance(description, (list, tuple)) and len(description) > 0:
290-
return description[0]
291-
return description
290+
return None
292291

293292
def _get_columns(self, table_name, schema=None):
294293
"""Get column information and primary key columns of the specified table"""

ingestion/tests/unit/topology/database/test_starrocks.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"""
1515

1616
from unittest import TestCase
17-
from unittest.mock import patch
17+
from unittest.mock import MagicMock, patch
1818

1919
from metadata.generated.schema.metadataIngestion.workflow import (
2020
OpenMetadataWorkflowConfig,
@@ -135,3 +135,80 @@ def test_iceberg_relkind_mapping(self):
135135
from metadata.ingestion.source.database.starrocks.metadata import RELKIND_MAP
136136

137137
assert RELKIND_MAP["ICEBERG"] == TableType.Iceberg
138+
139+
140+
class TestStarRocksGetTableDescription(TestCase):
141+
"""Tests for get_table_description querying INFORMATION_SCHEMA directly (fixes #26692)"""
142+
143+
@patch(
144+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.test_connection"
145+
)
146+
def setUp(self, test_connection):
147+
test_connection.return_value = False
148+
self.config = OpenMetadataWorkflowConfig.model_validate(mock_starrocks_config)
149+
self.source = StarRocksSource.create(
150+
mock_starrocks_config["source"],
151+
self.config.workflowConfig.openMetadataServerConfig,
152+
)
153+
154+
@patch(
155+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
156+
)
157+
def test_returns_table_comment(self, mock_connection):
158+
mock_result = MagicMock()
159+
mock_result.first.return_value = ("审计日志表",)
160+
mock_connection.execute.return_value = mock_result
161+
self.source.connection = mock_connection
162+
163+
description = self.source.get_table_description(
164+
schema_name="test_db",
165+
table_name="audit_tbl",
166+
inspector=MagicMock(),
167+
)
168+
assert description == "审计日志表"
169+
170+
@patch(
171+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
172+
)
173+
def test_returns_none_for_empty_comment(self, mock_connection):
174+
mock_result = MagicMock()
175+
mock_result.first.return_value = ("",)
176+
mock_connection.execute.return_value = mock_result
177+
self.source.connection = mock_connection
178+
179+
description = self.source.get_table_description(
180+
schema_name="test_db",
181+
table_name="no_comment_tbl",
182+
inspector=MagicMock(),
183+
)
184+
assert description is None
185+
186+
@patch(
187+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
188+
)
189+
def test_returns_none_when_no_row(self, mock_connection):
190+
mock_result = MagicMock()
191+
mock_result.first.return_value = None
192+
mock_connection.execute.return_value = mock_result
193+
self.source.connection = mock_connection
194+
195+
description = self.source.get_table_description(
196+
schema_name="test_db",
197+
table_name="missing_tbl",
198+
inspector=MagicMock(),
199+
)
200+
assert description is None
201+
202+
@patch(
203+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
204+
)
205+
def test_returns_none_on_exception(self, mock_connection):
206+
mock_connection.execute.side_effect = Exception("connection error")
207+
self.source.connection = mock_connection
208+
209+
description = self.source.get_table_description(
210+
schema_name="test_db",
211+
table_name="error_tbl",
212+
inspector=MagicMock(),
213+
)
214+
assert description is None

0 commit comments

Comments
 (0)