Skip to content

Commit b09bebb

Browse files
committed
Fix multi-row CAST when bind names contain escape-map characters.
PR #68 wraps mixed-type multi-row INSERT binds with CAST(... AS STRING) by post-editing the rendered SQL. Marker reconstruction used the raw bind name from ``self.binds``, but SQLAlchemy renders the marker after applying ``bindname_escape_characters`` (space/./[/]/(/)/%/: get translated). For any column name containing one of those chars the two strings disagree, ``str.replace`` is a no-op, and the mixed-type insert fails again with ``INCOMPATIBLE_TYPES_IN_INLINE_TABLE`` (PECOBLR-2746 follow-up, Comment 21). Look up the escaped form via ``self.escaped_bind_names`` so the rebuilt marker matches what the compiler actually wrote. Backtick-containing names take a different path in ``bindparam_string`` that bypasses the escape map; the ``.replace("\`", "\`\`")`` doubling in marker rebuild matches that path too, so backticks alone, and backticks combined with escape-map chars, are handled by the same code. Tests cover every char in ``bindname_escape_characters`` plus literal and combined backtick cases, all at compile-only level (no warehouse). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent b702520 commit b09bebb

2 files changed

Lines changed: 116 additions & 2 deletions

File tree

src/databricks/sqlalchemy/_ddl.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,14 +285,27 @@ def _build_multi_value_cast_plan(self, insert_stmt):
285285
return cast_plan
286286

287287
def _apply_multi_value_casts(self, sql_text, insert_stmt):
288-
"""Wrap selected ``:`name``` markers with ``CAST(... AS <type>)``."""
288+
"""Wrap selected ``:`name``` markers with ``CAST(... AS <type>)``.
289+
290+
``self.binds`` is keyed by the *raw* bind name (e.g.
291+
``'col with space_m0'``) but SQLAlchemy renders the marker using the
292+
*escaped* form after applying ``bindname_escape_characters``
293+
(space/./[/]/(/)/%/: → ``_`` etc., see ``compiler.py:bindparam_string``).
294+
The mapping is recorded in ``self.escaped_bind_names`` as
295+
``{original: escaped}``. We must look up the escaped form when
296+
reconstructing the marker — otherwise ``str.replace`` is a no-op for any
297+
column name containing an escaped character and no cast is applied,
298+
re-triggering the inline-table type incompatibility that this method
299+
exists to prevent (PECOBLR-2746 follow-up).
300+
"""
289301
cast_plan = self._build_multi_value_cast_plan(insert_stmt)
290302
if not cast_plan:
291303
return sql_text
292304

293305
rendered = sql_text
294306
for bind_name, target_type in cast_plan.items():
295-
marker = self._BIND_TEMPLATE % {"name": bind_name.replace("`", "``")}
307+
rendered_name = self.escaped_bind_names.get(bind_name, bind_name)
308+
marker = self._BIND_TEMPLATE % {"name": rendered_name.replace("`", "``")}
296309
rendered = rendered.replace(marker, f"CAST({marker} AS {target_type})")
297310
return rendered
298311

tests/test_local/test_ddl.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,3 +545,104 @@ def test_single_row_insert_does_not_render_casts(self):
545545

546546
sql = str(stmt.compile(bind=self.engine))
547547
assert "CAST(:`value` AS STRING)" not in sql
548+
549+
550+
class TestMultiRowInsertCastsEscapedBindNames(DDLTestBase):
551+
"""Regression tests for PECOBLR-2746 follow-up.
552+
553+
SQLAlchemy's ``bindname_escape_characters`` translates the chars space,
554+
``.``, ``[``, ``]``, ``(``, ``)``, ``%``, ``:`` in bind names to ``_`` etc.
555+
The cast pass keys off ``self.binds`` (raw names) but the rendered SQL uses
556+
the escaped form. If the cast pass doesn't look up the escaped name, the
557+
str.replace becomes a no-op and the mixed-type insert fails again. These
558+
tests pin the fix.
559+
"""
560+
561+
# Each pair is (column_name, expected_bind_token_inside_backticks).
562+
# The expected token mirrors SQLAlchemy's default bindname_escape_characters
563+
# map: space/./[/] → _, ( → A, ) → Z, % → P, : → C.
564+
_ESCAPED_NAMES = [
565+
("col with space", "col_with_space"),
566+
("col.dot", "col_dot"),
567+
("col[bracket]", "col_bracket_"),
568+
("col(paren)", "colAparenZ"),
569+
("col%pct", "colPpct"),
570+
("col:colon", "colCcolon"),
571+
]
572+
573+
@pytest.mark.parametrize("column_name,escaped_token", _ESCAPED_NAMES)
574+
def test_cast_renders_for_escape_char_column(self, column_name, escaped_token):
575+
metadata = MetaData()
576+
table = Table("t", metadata, Column(column_name, String()))
577+
stmt = insert(table).values(
578+
[{column_name: 1}, {column_name: 0}, {column_name: "NE"}]
579+
)
580+
581+
sql = str(stmt.compile(bind=self.engine))
582+
583+
for idx in range(3):
584+
marker = f":`{escaped_token}_m{idx}`"
585+
cast = f"CAST({marker} AS STRING)"
586+
assert cast in sql, (
587+
f"expected {cast!r} in compiled SQL for column "
588+
f"{column_name!r}, got:\n{sql}"
589+
)
590+
# And the bare marker must not appear standalone — every
591+
# occurrence of it must be inside the CAST(...).
592+
assert sql.count(marker) == sql.count(cast), (
593+
f"bare {marker!r} appears outside CAST(...) for column "
594+
f"{column_name!r}:\n{sql}"
595+
)
596+
597+
def test_cast_renders_for_backtick_column_name(self):
598+
"""Literal-backtick column: bindparam_string short-circuits super and
599+
doubles the backtick directly, so escaped_bind_names stays empty. Our
600+
`.get(bind_name, bind_name)` falls back to the raw name and the
601+
`.replace("`", "``")` in the marker rebuild reproduces the same
602+
doubling, so the marker matches and CAST wraps correctly.
603+
"""
604+
metadata = MetaData()
605+
table = Table("t", metadata, Column("col`tick", String()))
606+
stmt = insert(table).values(
607+
[{"col`tick": 1}, {"col`tick": 0}, {"col`tick": "NE"}]
608+
)
609+
610+
sql = str(stmt.compile(bind=self.engine))
611+
for idx in range(3):
612+
assert f"CAST(:`col``tick_m{idx}` AS STRING)" in sql, sql
613+
614+
def test_cast_renders_for_backtick_plus_escape_char(self):
615+
"""Both backtick and a default-escape-map char in the same column name.
616+
The backtick path bypasses super entirely (so the escape map never
617+
runs), and `.replace("`", "``")` doubles the backtick — the dot stays
618+
verbatim inside the backtick-quoted marker.
619+
"""
620+
metadata = MetaData()
621+
table = Table("t", metadata, Column("col`x.y", String()))
622+
stmt = insert(table).values([{"col`x.y": 1}, {"col`x.y": 0}, {"col`x.y": "NE"}])
623+
624+
sql = str(stmt.compile(bind=self.engine))
625+
for idx in range(3):
626+
assert f"CAST(:`col``x.y_m{idx}` AS STRING)" in sql, sql
627+
628+
def test_cast_renders_for_mixed_escape_chars_in_same_table(self):
629+
metadata = MetaData()
630+
table = Table(
631+
"t",
632+
metadata,
633+
Column("a b", String()),
634+
Column("c.d", String()),
635+
Column("e", String()),
636+
)
637+
stmt = insert(table).values(
638+
[
639+
{"a b": 1, "c.d": 1, "e": 1},
640+
{"a b": 0, "c.d": 0, "e": 0},
641+
{"a b": "NE", "c.d": "NE", "e": "NE"},
642+
]
643+
)
644+
645+
sql = str(stmt.compile(bind=self.engine))
646+
for token in ("a_b", "c_d", "e"):
647+
for idx in range(3):
648+
assert f"CAST(:`{token}_m{idx}` AS STRING)" in sql, sql

0 commit comments

Comments
 (0)