Skip to content

Commit 22319eb

Browse files
committed
fix(starrocks): query INFORMATION_SCHEMA for table comments (#26692)
StarRocks DDL places COMMENT on a separate line, causing SQLAlchemy's MySQL parser to silently drop table descriptions. Override get_table_description() to fetch TABLE_COMMENT directly from INFORMATION_SCHEMA.TABLES instead.
1 parent 58a76d7 commit 22319eb

2 files changed

Lines changed: 91 additions & 13 deletions

File tree

ingestion/src/metadata/ingestion/source/database/starrocks/metadata.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
STARROCKS_GET_TABLE_NAMES,
4242
STARROCKS_PARTITION_DETAILS,
4343
STARROCKS_SHOW_FULL_COLUMNS,
44+
STARROCKS_TABLE_COMMENTS,
4445
)
4546
from metadata.utils.logger import ingestion_logger
4647
from metadata.utils.ssl_manager import SSLManager, check_ssl_and_init
@@ -269,26 +270,26 @@ def query_view_names_and_types(
269270

270271
return tables
271272

272-
@staticmethod
273273
def get_table_description(
274-
schema_name: str, table_name: str, inspector: Inspector
274+
self, schema_name: str, table_name: str, inspector: Inspector
275275
) -> Optional[str]:
276-
description = None
276+
# Fixes #26692: StarRocks formats SHOW CREATE TABLE with COMMENT on a
277+
# separate line, which SQLAlchemy's MySQL dialect parser cannot parse.
278+
# Query INFORMATION_SCHEMA.TABLES directly to get the table comment.
277279
try:
278-
table_info: dict = inspector.get_table_comment(table_name, schema_name)
280+
result = self.connection.execute(
281+
sql.text(STARROCKS_TABLE_COMMENTS),
282+
{"schema": schema_name, "table_name": table_name},
283+
)
284+
row = result.first()
285+
if row:
286+
return row[0] or None
279287
except Exception as exc: # pylint: disable=broad-except
280288
logger.debug(traceback.format_exc())
281289
logger.warning(
282290
f"Table description error for table [{schema_name}.{table_name}]: {exc}"
283291
)
284-
else:
285-
description = table_info.get("text")
286-
287-
if description is None:
288-
return None
289-
if isinstance(description, (list, tuple)) and len(description) > 0:
290-
return description[0]
291-
return description
292+
return None
292293

293294
def _get_columns(self, table_name, schema=None):
294295
"""Get column information and primary key columns of the specified table"""

ingestion/tests/unit/topology/database/test_starrocks.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"""
1515

1616
from unittest import TestCase
17-
from unittest.mock import patch
17+
from unittest.mock import MagicMock, patch
1818

1919
from metadata.generated.schema.metadataIngestion.workflow import (
2020
OpenMetadataWorkflowConfig,
@@ -135,3 +135,80 @@ def test_iceberg_relkind_mapping(self):
135135
from metadata.ingestion.source.database.starrocks.metadata import RELKIND_MAP
136136

137137
assert RELKIND_MAP["ICEBERG"] == TableType.Iceberg
138+
139+
140+
class TestStarRocksGetTableDescription(TestCase):
141+
"""Tests for get_table_description querying INFORMATION_SCHEMA directly (fixes #26692)"""
142+
143+
@patch(
144+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.test_connection"
145+
)
146+
def setUp(self, test_connection):
147+
test_connection.return_value = False
148+
self.config = OpenMetadataWorkflowConfig.model_validate(mock_starrocks_config)
149+
self.source = StarRocksSource.create(
150+
mock_starrocks_config["source"],
151+
self.config.workflowConfig.openMetadataServerConfig,
152+
)
153+
154+
@patch(
155+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
156+
)
157+
def test_returns_table_comment(self, mock_connection):
158+
mock_result = MagicMock()
159+
mock_result.first.return_value = ("审计日志表",)
160+
mock_connection.execute.return_value = mock_result
161+
self.source.connection = mock_connection
162+
163+
description = self.source.get_table_description(
164+
schema_name="test_db",
165+
table_name="audit_tbl",
166+
inspector=MagicMock(),
167+
)
168+
assert description == "审计日志表"
169+
170+
@patch(
171+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
172+
)
173+
def test_returns_none_for_empty_comment(self, mock_connection):
174+
mock_result = MagicMock()
175+
mock_result.first.return_value = ("",)
176+
mock_connection.execute.return_value = mock_result
177+
self.source.connection = mock_connection
178+
179+
description = self.source.get_table_description(
180+
schema_name="test_db",
181+
table_name="no_comment_tbl",
182+
inspector=MagicMock(),
183+
)
184+
assert description is None
185+
186+
@patch(
187+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
188+
)
189+
def test_returns_none_when_no_row(self, mock_connection):
190+
mock_result = MagicMock()
191+
mock_result.first.return_value = None
192+
mock_connection.execute.return_value = mock_result
193+
self.source.connection = mock_connection
194+
195+
description = self.source.get_table_description(
196+
schema_name="test_db",
197+
table_name="missing_tbl",
198+
inspector=MagicMock(),
199+
)
200+
assert description is None
201+
202+
@patch(
203+
"metadata.ingestion.source.database.common_db_source.CommonDbSourceService.connection"
204+
)
205+
def test_returns_none_on_exception(self, mock_connection):
206+
mock_connection.execute.side_effect = Exception("connection error")
207+
self.source.connection = mock_connection
208+
209+
description = self.source.get_table_description(
210+
schema_name="test_db",
211+
table_name="error_tbl",
212+
inspector=MagicMock(),
213+
)
214+
assert description is None

0 commit comments

Comments
 (0)