Skip to content

Commit 106e087

Browse files
authored
Merge pull request #60 from databricks/fix-hyphenated-column-bind-params
[ES-1867329] Quote bind parameter names with backticks
2 parents ad9eb35 + c9a8412 commit 106e087

2 files changed

Lines changed: 394 additions & 2 deletions

File tree

src/databricks/sqlalchemy/_ddl.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
1111
legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
1212

1313
def __init__(self, dialect):
14-
super().__init__(dialect, initial_quote="`")
14+
# ``escape_quote`` must match ``initial_quote`` so a literal
15+
# backtick inside a quoted identifier is doubled (``a``b`` —
16+
# per the ``BACKQUOTED_IDENTIFIER`` lexer rule in Spark SQL).
17+
# The default from SQLAlchemy is ``"`` which would escape the
18+
# wrong character, producing invalid DDL like ``a`b``.
19+
super().__init__(dialect, initial_quote="`", escape_quote="`")
1520

1621

1722
class DatabricksDDLCompiler(compiler.DDLCompiler):
@@ -84,6 +89,82 @@ def get_column_specification(self, column, **kwargs):
8489

8590

8691
class DatabricksStatementCompiler(compiler.SQLCompiler):
92+
"""Compiler that wraps every bind parameter marker in backticks.
93+
94+
Databricks named parameter markers only accept bare identifiers
95+
(``[A-Za-z_][A-Za-z0-9_]*``) unless backtick-quoted. DataFrame-origin
96+
column names frequently contain hyphens (``col-with-hyphen``), which
97+
SQLAlchemy would otherwise render as an invalid marker
98+
``:col-with-hyphen`` — the parser splits on ``-`` and reports
99+
UNBOUND_SQL_PARAMETER.
100+
101+
Wrapping every marker in backticks (``:`col-with-hyphen```) is valid
102+
for any identifier the Spark SQL grammar accepts, so we wrap
103+
unconditionally. The backticks are SQL-side quoting only — the
104+
parameter's logical name is the text between them, so the params
105+
dict sent to the driver keeps the original unquoted key.
106+
107+
Implementation: fix ``bindtemplate`` and ``compilation_bindtemplate``
108+
on the class. Every bind-render path in SQLAlchemy reads one of
109+
these two attributes (``bindparam_string``,
110+
``_literal_execute_expanding_parameter``, and the insertmanyvalues
111+
path which this dialect doesn't enable), so fixing them at the
112+
attribute level covers all paths with no method overrides. We use
113+
property descriptors with no-op setters because ``SQLCompiler.__init__``
114+
assigns the default templates from ``BIND_TEMPLATES[paramstyle]``
115+
during its own init — a plain class attribute would be shadowed by
116+
that instance assignment. The no-op setter silently discards super's
117+
assignment so our class-level value is always what gets read.
118+
"""
119+
120+
_BIND_TEMPLATE = ":`%(name)s`"
121+
122+
# The no-op setter makes ``SQLCompiler.__init__``'s assignment of the
123+
# default template a silent no-op so our class-level value is what
124+
# every render path reads. ``# type: ignore[assignment]`` is required
125+
# because super declares these as ``str``, and a ``property`` is a
126+
# different type at the static-analysis level (runtime behavior is
127+
# unchanged — the descriptor returns ``str`` on access).
128+
bindtemplate = property( # type: ignore[assignment]
129+
lambda self: self._BIND_TEMPLATE, lambda self, _: None
130+
)
131+
compilation_bindtemplate = property( # type: ignore[assignment]
132+
lambda self: self._BIND_TEMPLATE, lambda self, _: None
133+
)
134+
135+
def bindparam_string(self, name, **kw):
136+
# The template ``:`%(name)s``` assumes ``name`` is safe inside
137+
# backticks — any literal backtick must be doubled per the
138+
# ``BACKQUOTED_IDENTIFIER`` lexer rule. The doubling affects only
139+
# the rendered SQL; the params dict key sent to the driver stays
140+
# the single-backtick original (the server collapses `` -> `
141+
# when it parses the marker name).
142+
#
143+
# When a backtick is present, render the marker ourselves rather
144+
# than delegating to super. Super would otherwise also apply
145+
# ``bindname_escape_characters`` translation (``.``->``_``,
146+
# ``[``->``_``, etc.) AND set ``escaped_from``, which together
147+
# would propagate into ``escaped_bind_names`` and rewrite the
148+
# params-dict key. The original dict key uses a single backtick
149+
# and the un-translated form, so the rewrite would create a
150+
# mismatch with what the server expects when it parses the
151+
# backtick-quoted marker name. By owning the rendering here we
152+
# keep ``escaped_bind_names`` empty for these names and the dict
153+
# key passes through unchanged.
154+
if (
155+
"`" in name
156+
and not kw.get("escaped_from")
157+
and not kw.get("post_compile", False)
158+
):
159+
accumulate = kw.get("accumulate_bind_names")
160+
if accumulate is not None:
161+
accumulate.add(name)
162+
visited = kw.get("visited_bindparam")
163+
if visited is not None:
164+
visited.append(name)
165+
return self._BIND_TEMPLATE % {"name": name.replace("`", "``")}
166+
return super().bindparam_string(name, **kw)
167+
87168
def limit_clause(self, select, **kw):
88169
"""Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
89170
since Databricks SQL doesn't support the latter.

0 commit comments

Comments
 (0)