Skip to content

Commit d1c6a4d

Browse files
authored
Add backticks to table name in mysql median function (#27406)
1 parent cc06887 commit d1c6a4d

3 files changed

Lines changed: 201 additions & 10 deletions

File tree

ingestion/src/metadata/profiler/orm/functions/median.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,11 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
207207
{col},
208208
ROW_NUMBER() OVER () AS row_num
209209
FROM
210-
{table} AS median_inner,
210+
`{table}` AS median_inner,
211211
(SELECT @counter := COUNT(*)
212-
FROM {table} AS median_count
213-
WHERE median_count.{dimension_col} = {table}.{dimension_col}) t_count
214-
WHERE median_inner.{dimension_col} = {table}.{dimension_col}
212+
FROM `{table}` AS median_count
213+
WHERE median_count.{dimension_col} = `{table}`.{dimension_col}) t_count
214+
WHERE median_inner.{dimension_col} = `{table}`.{dimension_col}
215215
ORDER BY {col}
216216
) temp
217217
WHERE temp.row_num = ROUND({percentile} * @counter)
@@ -229,8 +229,8 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
229229
{col},
230230
ROW_NUMBER() OVER () AS row_num
231231
FROM
232-
{table},
233-
(SELECT @counter := COUNT(*) FROM {table}) t_count
232+
`{table}`,
233+
(SELECT @counter := COUNT(*) FROM `{table}`) t_count
234234
ORDER BY {col}
235235
) temp
236236
WHERE temp.row_num = ROUND({percentile} * @counter)
@@ -275,8 +275,8 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
275275
SELECT {col},
276276
ROW_NUMBER() OVER (ORDER BY {col}) as rn,
277277
COUNT(*) OVER () as cnt
278-
FROM {table} AS median_inner
279-
WHERE median_inner.{dimension_col} = {table}.{dimension_col}
278+
FROM "{table}" AS median_inner
279+
WHERE median_inner.{dimension_col} = "{table}".{dimension_col}
280280
AND {col} IS NOT NULL
281281
)
282282
WHERE rn IN (
@@ -292,13 +292,13 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
292292
return """
293293
(SELECT
294294
{col}
295-
FROM {table}
295+
FROM "{table}"
296296
WHERE {col} IS NOT NULL
297297
ORDER BY {col}
298298
LIMIT 1
299299
OFFSET (
300300
SELECT ROUND(COUNT(*) * {percentile} -1)
301-
FROM {table}
301+
FROM "{table}"
302302
WHERE {col} IS NOT NULL
303303
)
304304
)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright 2025 Collate
2+
# Licensed under the Collate Community License, Version 1.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# Copyright 2025 Collate
2+
# Licensed under the Collate Community License, Version 1.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
"""
13+
Test MySQL median/quartile SQL generation with reserved word table names.
14+
15+
Issue: https://github.com/open-metadata/OpenMetadata/issues/26798
16+
When table names are MySQL reserved words (e.g., "Signal"), the generated SQL
17+
must escape them with backticks to avoid syntax errors.
18+
"""
19+
20+
import pytest
21+
from sqlalchemy import Column, Integer, String, create_engine
22+
from sqlalchemy.orm import DeclarativeBase
23+
24+
from metadata.profiler.orm.functions.median import MedianFn
25+
26+
27+
class Base(DeclarativeBase):
28+
pass
29+
30+
31+
class Signal(Base):
32+
"""Test table with reserved word name"""
33+
34+
__tablename__ = "Signal"
35+
id = Column(Integer, primary_key=True)
36+
customer_id = Column(String(50))
37+
value = Column(Integer)
38+
39+
40+
class TestMySQLMedianSQL:
41+
"""Test MySQL median SQL generation with reserved word table names"""
42+
43+
@pytest.fixture
44+
def mysql_engine(self):
45+
"""Create a MySQL engine for compilation testing"""
46+
# Using mysql+pymysql://localhost/test dialect for compilation
47+
# We don't need actual connection, just the dialect for SQL compilation
48+
engine = create_engine(
49+
"mysql+pymysql://", strategy="mock", executor=lambda *a, **kw: None
50+
)
51+
return engine
52+
53+
def test_median_with_reserved_word_table_name(self, mysql_engine):
54+
"""Test that table name is properly escaped with backticks"""
55+
col = Signal.customer_id
56+
table_name = "Signal" # Reserved word
57+
percentile = 0.5
58+
59+
# Create the MedianFn expression
60+
median_expr = MedianFn(col, table_name, percentile)
61+
62+
# Compile with MySQL dialect
63+
compiled = median_expr.compile(
64+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
65+
)
66+
sql_string = str(compiled)
67+
68+
# Verify table name is escaped with backticks
69+
assert "`Signal`" in sql_string, (
70+
f"Table name 'Signal' should be escaped with backticks.\n"
71+
f"Generated SQL: {sql_string}"
72+
)
73+
74+
# Verify that the unquoted "Signal" doesn't appear as a table reference
75+
# (it may appear in other contexts, but not as "FROM Signal," or "FROM Signal)")
76+
lines = sql_string.split("\n")
77+
for line in lines:
78+
# Check FROM clauses - they should have backticks
79+
if "FROM" in line and "Signal" in line and "Signal" not in "`Signal`":
80+
# This would be the problematic case: FROM Signal without backticks
81+
assert "`Signal`" in line, (
82+
f"FROM clause should have backticks around table name.\n"
83+
f"Line: {line}"
84+
)
85+
86+
def test_first_quartile_with_reserved_word_table_name(self, mysql_engine):
87+
"""Test that first quartile (Q1) works with reserved word table names"""
88+
col = Signal.customer_id
89+
table_name = "Signal"
90+
percentile = 0.25
91+
92+
median_expr = MedianFn(col, table_name, percentile)
93+
compiled = median_expr.compile(
94+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
95+
)
96+
sql_string = str(compiled)
97+
98+
assert "`Signal`" in sql_string, (
99+
f"Q1 (0.25): Table name should be escaped.\n" f"Generated SQL: {sql_string}"
100+
)
101+
102+
def test_third_quartile_with_reserved_word_table_name(self, mysql_engine):
103+
"""Test that third quartile (Q3) works with reserved word table names"""
104+
col = Signal.customer_id
105+
table_name = "Signal"
106+
percentile = 0.75
107+
108+
median_expr = MedianFn(col, table_name, percentile)
109+
compiled = median_expr.compile(
110+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
111+
)
112+
sql_string = str(compiled)
113+
114+
assert "`Signal`" in sql_string, (
115+
f"Q3 (0.75): Table name should be escaped.\n" f"Generated SQL: {sql_string}"
116+
)
117+
118+
def test_median_with_multiple_reserved_words(self, mysql_engine):
119+
"""Test with various MySQL reserved words as table names"""
120+
reserved_words = ["Signal", "Order", "Group", "Select", "Create", "Table"]
121+
percentile = 0.5
122+
123+
for table_name in reserved_words:
124+
col = Signal.customer_id
125+
median_expr = MedianFn(col, table_name, percentile)
126+
compiled = median_expr.compile(
127+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
128+
)
129+
sql_string = str(compiled)
130+
131+
expected_escaped = f"`{table_name}`"
132+
assert expected_escaped in sql_string, (
133+
f"Reserved word '{table_name}' should be escaped with backticks.\n"
134+
f"Generated SQL: {sql_string}"
135+
)
136+
137+
def test_column_name_properly_quoted(self, mysql_engine):
138+
"""Verify that column names are properly quoted by compiler.process()"""
139+
col = Signal.customer_id
140+
table_name = "Signal"
141+
percentile = 0.5
142+
143+
median_expr = MedianFn(col, table_name, percentile)
144+
compiled = median_expr.compile(
145+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
146+
)
147+
sql_string = str(compiled)
148+
149+
# Column name should be quoted (either backticks or other depending on compiler)
150+
# It should be present in the SQL
151+
assert "customer_id" in sql_string or "`customer_id`" in sql_string, (
152+
f"Column name should be present in generated SQL.\n"
153+
f"Generated SQL: {sql_string}"
154+
)
155+
156+
def test_no_cross_join_syntax_error(self, mysql_engine):
157+
"""Verify the generated SQL doesn't have the problematic comma-join pattern"""
158+
col = Signal.customer_id
159+
table_name = "Signal"
160+
percentile = 0.5
161+
162+
median_expr = MedianFn(col, table_name, percentile)
163+
compiled = median_expr.compile(
164+
dialect=mysql_engine.dialect, compile_kwargs={"literal_binds": True}
165+
)
166+
sql_string = str(compiled)
167+
168+
# The old problematic pattern was:
169+
# FROM Signal, (SELECT @counter := COUNT(*) FROM Signal) t_count
170+
# This would fail because Signal is a reserved word without backticks
171+
# With the fix, it should be:
172+
# FROM `Signal`, (SELECT ... FROM `Signal`) t_count
173+
174+
# Verify that if there's a FROM clause with Signal and a comma join,
175+
# the table name is escaped
176+
if "FROM" in sql_string and "," in sql_string:
177+
# Look for the pattern "FROM `table`," which is correct
178+
assert "FROM `Signal`" in sql_string or "FROM\n" in sql_string, (
179+
f"If using comma-join, table must be escaped.\n"
180+
f"Generated SQL: {sql_string}"
181+
)

0 commit comments

Comments
 (0)