@@ -11,7 +11,12 @@ class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
1111 legal_characters = re .compile (r"^[A-Z0-9_]+$" , re .I )
1212
1313 def __init__ (self , dialect ):
14- super ().__init__ (dialect , initial_quote = "`" )
14+ # ``escape_quote`` must match ``initial_quote`` so a literal
15+ # backtick inside a quoted identifier is doubled (``a``b`` —
16+ # per the ``BACKQUOTED_IDENTIFIER`` lexer rule in Spark SQL).
17+ # The default from SQLAlchemy is ``"`` which would escape the
18+ # wrong character, producing invalid DDL like ``a`b``.
19+ super ().__init__ (dialect , initial_quote = "`" , escape_quote = "`" )
1520
1621
1722class DatabricksDDLCompiler (compiler .DDLCompiler ):
@@ -84,6 +89,82 @@ def get_column_specification(self, column, **kwargs):
8489
8590
8691class DatabricksStatementCompiler (compiler .SQLCompiler ):
92+ """Compiler that wraps every bind parameter marker in backticks.
93+
94+ Databricks named parameter markers only accept bare identifiers
95+ (``[A-Za-z_][A-Za-z0-9_]*``) unless backtick-quoted. DataFrame-origin
96+ column names frequently contain hyphens (``col-with-hyphen``), which
97+ SQLAlchemy would otherwise render as an invalid marker
98+ ``:col-with-hyphen`` — the parser splits on ``-`` and reports
99+ UNBOUND_SQL_PARAMETER.
100+
101+ Wrapping every marker in backticks (``:`col-with-hyphen```) is valid
102+ for any identifier the Spark SQL grammar accepts, so we wrap
103+ unconditionally. The backticks are SQL-side quoting only — the
104+ parameter's logical name is the text between them, so the params
105+ dict sent to the driver keeps the original unquoted key.
106+
107+ Implementation: fix ``bindtemplate`` and ``compilation_bindtemplate``
108+ on the class. Every bind-render path in SQLAlchemy reads one of
109+ these two attributes (``bindparam_string``,
110+ ``_literal_execute_expanding_parameter``, and the insertmanyvalues
111+ path which this dialect doesn't enable), so fixing them at the
112+ attribute level covers all paths with no method overrides. We use
113+ property descriptors with no-op setters because ``SQLCompiler.__init__``
114+ assigns the default templates from ``BIND_TEMPLATES[paramstyle]``
115+ during its own init — a plain class attribute would be shadowed by
116+ that instance assignment. The no-op setter silently discards super's
117+ assignment so our class-level value is always what gets read.
118+ """
119+
120+ _BIND_TEMPLATE = ":`%(name)s`"
121+
122+ # The no-op setter makes ``SQLCompiler.__init__``'s assignment of the
123+ # default template a silent no-op so our class-level value is what
124+ # every render path reads. ``# type: ignore[assignment]`` is required
125+ # because super declares these as ``str``, and a ``property`` is a
126+ # different type at the static-analysis level (runtime behavior is
127+ # unchanged — the descriptor returns ``str`` on access).
128+ bindtemplate = property ( # type: ignore[assignment]
129+ lambda self : self ._BIND_TEMPLATE , lambda self , _ : None
130+ )
131+ compilation_bindtemplate = property ( # type: ignore[assignment]
132+ lambda self : self ._BIND_TEMPLATE , lambda self , _ : None
133+ )
134+
135+ def bindparam_string (self , name , ** kw ):
136+ # The template ``:`%(name)s``` assumes ``name`` is safe inside
137+ # backticks — any literal backtick must be doubled per the
138+ # ``BACKQUOTED_IDENTIFIER`` lexer rule. The doubling affects only
139+ # the rendered SQL; the params dict key sent to the driver stays
140+ # the single-backtick original (the server collapses `` -> `
141+ # when it parses the marker name).
142+ #
143+ # When a backtick is present, render the marker ourselves rather
144+ # than delegating to super. Super would otherwise also apply
145+ # ``bindname_escape_characters`` translation (``.``->``_``,
146+ # ``[``->``_``, etc.) AND set ``escaped_from``, which together
147+ # would propagate into ``escaped_bind_names`` and rewrite the
148+ # params-dict key. The original dict key uses a single backtick
149+ # and the un-translated form, so the rewrite would create a
150+ # mismatch with what the server expects when it parses the
151+ # backtick-quoted marker name. By owning the rendering here we
152+ # keep ``escaped_bind_names`` empty for these names and the dict
153+ # key passes through unchanged.
154+ if (
155+ "`" in name
156+ and not kw .get ("escaped_from" )
157+ and not kw .get ("post_compile" , False )
158+ ):
159+ accumulate = kw .get ("accumulate_bind_names" )
160+ if accumulate is not None :
161+ accumulate .add (name )
162+ visited = kw .get ("visited_bindparam" )
163+ if visited is not None :
164+ visited .append (name )
165+ return self ._BIND_TEMPLATE % {"name" : name .replace ("`" , "``" )}
166+ return super ().bindparam_string (name , ** kw )
167+
87168 def limit_clause (self , select , ** kw ):
88169 """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
89170 since Databricks SQL doesn't support the latter.
0 commit comments