databricks
diff --git a/‎src/databricks/sqlalchemy/_ddl.py‎
Lines changed: 121 additions & 0 deletions b/‎src/databricks/sqlalchemy/_ddl.py‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎tests/test_local/e2e/test_complex_types.py‎
Lines changed: 116 additions & 12 deletions b/‎tests/test_local/e2e/test_complex_types.py‎
Lines changed: 116 additions & 12 deletions
@@ -1,4 +1,7 @@
 import re
+from datetime import date, datetime, time
+from numbers import Number
+from uuid import UUID
 from sqlalchemy.sql import compiler, sqltypes
 import logging
 
@@ -165,6 +168,124 @@ def bindparam_string(self, name, **kw):
             return self._BIND_TEMPLATE % {"name": name.replace("`", "``")}
         return super().bindparam_string(name, **kw)
 
+    @staticmethod
+    def _split_multivalue_bind_name(bind_name):
+        """Split SQLAlchemy's ``<col>_m<idx>`` bind names into (column, idx)."""
+        match = re.match(r"^(?P<col>.+)_m(?P<idx>\d+)$", bind_name)
+        if not match:
+            return None
+        return match.group("col"), int(match.group("idx"))
+
+    @staticmethod
+    def _value_family(value):
+        """Return scalar value family; ``None`` means non-scalar/unsupported."""
+        if value is None:
+            return "null"
+        if isinstance(value, bool):
+            return "bool"
+        if isinstance(value, Number):
+            return "number"
+        if isinstance(value, str):
+            return "string"
+        if isinstance(value, (bytes, bytearray, memoryview)):
+            return "binary"
+        if isinstance(value, (date, time, datetime)):
+            return "temporal"
+        if isinstance(value, UUID):
+            return "uuid"
+        return None
+
+    @staticmethod
+    def _has_custom_bind_expression(type_engine):
+        """True if the type (or its impl) customizes bind-expression rendering."""
+        type_cls = type(type_engine)
+        if (
+            getattr(type_cls, "bind_expression", None)
+            is not sqltypes.TypeEngine.bind_expression
+        ):
+            return True
+
+        impl = getattr(type_engine, "impl", None)
+        if impl is not None:
+            impl_cls = type(impl)
+            if (
+                getattr(impl_cls, "bind_expression", None)
+                is not sqltypes.TypeEngine.bind_expression
+            ):
+                return True
+        return False
+
+    def _build_multi_value_cast_plan(self, insert_stmt):
+        """Return {bind_name: cast_sql_type} for multi-row VALUES insert binds.
+
+        Cast only *mixed scalar* multi-row bind groups. This avoids breaking
+        complex/custom bind types (e.g. ARRAY/MAP/VARIANT) while still fixing
+        Spark inline-table incompatibility for object columns that mix
+        primitive families (e.g. INT + STRING).
+        """
+        if not getattr(insert_stmt, "_multi_values", None):
+            return {}
+
+        grouped_binds = {}
+        for bind_name, bind_param in self.binds.items():
+            split = self._split_multivalue_bind_name(bind_name)
+            if split is None:
+                continue
+            column_name, _ = split
+            grouped_binds.setdefault(column_name, []).append((bind_name, bind_param))
+
+        cast_plan = {}
+        for bind_entries in grouped_binds.values():
+            families = set()
+            has_non_scalar = False
+            has_custom_bind_expression = False
+
+            for _, bind_param in bind_entries:
+                value_family = self._value_family(getattr(bind_param, "value", None))
+                if value_family is None:
+                    has_non_scalar = True
+                    break
+                if value_family != "null":
+                    families.add(value_family)
+
+                type_engine = getattr(bind_param, "type", None)
+                if type_engine is not None and self._has_custom_bind_expression(
+                    type_engine
+                ):
+                    has_custom_bind_expression = True
+
+            if has_non_scalar or has_custom_bind_expression or len(families) <= 1:
+                continue
+
+            for bind_name, bind_param in bind_entries:
+                type_engine = getattr(bind_param, "type", None)
+                if type_engine is None or isinstance(type_engine, sqltypes.NullType):
+                    continue
+
+                dialect_type = type_engine._unwrapped_dialect_impl(self.dialect)
+                target_type = self.dialect.type_compiler_instance.process(
+                    dialect_type, identifier_preparer=self.preparer
+                )
+                cast_plan[bind_name] = target_type
+
+        return cast_plan
+
+    def _apply_multi_value_casts(self, sql_text, insert_stmt):
+        """Wrap selected ``:`name``` markers with ``CAST(... AS <type>)``."""
+        cast_plan = self._build_multi_value_cast_plan(insert_stmt)
+        if not cast_plan:
+            return sql_text
+
+        rendered = sql_text
+        for bind_name, target_type in cast_plan.items():
+            marker = self._BIND_TEMPLATE % {"name": bind_name.replace("`", "``")}
+            rendered = rendered.replace(marker, f"CAST({marker} AS {target_type})")
+        return rendered
+
+    def visit_insert(self, insert_stmt, **kw):
+        sql_text = super().visit_insert(insert_stmt, **kw)
+        return self._apply_multi_value_casts(sql_text, insert_stmt)
+
     def limit_clause(self, select, **kw):
         """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
         since Databricks SQL doesn't support the latter.
 
@@ -11,7 +11,13 @@
     DateTime,
 )
 from collections.abc import Sequence
-from databricks.sqlalchemy import TIMESTAMP, TINYINT, DatabricksArray, DatabricksMap, DatabricksVariant
+from databricks.sqlalchemy import (
+    TIMESTAMP,
+    TINYINT,
+    DatabricksArray,
+    DatabricksMap,
+    DatabricksVariant,
+)
 from sqlalchemy.orm import DeclarativeBase, Session
 from sqlalchemy import select
 from datetime import date, datetime, time, timedelta, timezone
@@ -20,6 +26,7 @@
 import decimal
 import json
 
+
 class TestComplexTypes(TestSetup):
     def _parse_to_common_type(self, value):
         """
@@ -175,8 +182,8 @@ class VariantTable(Base):
                 "number": 123,
                 "boolean": True,
                 "array": [1, 2, 3],
-                "object": {"nested": "value"}
-            }
+                "object": {"nested": "value"},
+            },
         }
 
         return VariantTable, sample_data
@@ -239,6 +246,44 @@ def test_map_table_creation_pandas(self):
             df_result = pd.read_sql(stmt, engine)
             assert self._recursive_compare(df_result.iloc[0].to_dict(), sample_data)
 
+    def test_array_table_creation_pandas_multi(self):
+        table, sample_data = self.sample_array_table()
+
+        with self.table_context(table) as engine:
+            df = pd.DataFrame([sample_data, sample_data | {"int_col": 2}])
+            df.to_sql(
+                table.__tablename__,
+                engine,
+                if_exists="append",
+                index=False,
+                method="multi",
+            )
+
+            stmt = select(table).order_by(table.int_col)
+            df_result = pd.read_sql(stmt, engine)
+            assert self._recursive_compare(df_result.iloc[0].to_dict(), sample_data)
+            expected_second = sample_data | {"int_col": 2}
+            assert self._recursive_compare(df_result.iloc[1].to_dict(), expected_second)
+
+    def test_map_table_creation_pandas_multi(self):
+        table, sample_data = self.sample_map_table()
+
+        with self.table_context(table) as engine:
+            df = pd.DataFrame([sample_data, sample_data | {"int_col": 2}])
+            df.to_sql(
+                table.__tablename__,
+                engine,
+                if_exists="append",
+                index=False,
+                method="multi",
+            )
+
+            stmt = select(table).order_by(table.int_col)
+            df_result = pd.read_sql(stmt, engine)
+            assert self._recursive_compare(df_result.iloc[0].to_dict(), sample_data)
+            expected_second = sample_data | {"int_col": 2}
+            assert self._recursive_compare(df_result.iloc[1].to_dict(), expected_second)
+
     def test_insert_variant_table_sqlalchemy(self):
         table, sample_data = self.sample_variant_table()
 
@@ -253,7 +298,12 @@ def test_insert_variant_table_sqlalchemy(self):
             result = session.scalar(stmt)
             compare = {key: getattr(result, key) for key in sample_data.keys()}
             # Parse JSON values back to original format for comparison
-            for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']:
+            for key in [
+                "variant_simple_col",
+                "variant_nested_col",
+                "variant_array_col",
+                "variant_mixed_col",
+            ]:
                 if compare[key] is not None:
                     compare[key] = json.loads(compare[key])
 
@@ -263,26 +313,76 @@ def test_variant_table_creation_pandas(self):
         table, sample_data = self.sample_variant_table()
 
         with self.table_context(table) as engine:
-            
+
             df = pd.DataFrame([sample_data])
             dtype_mapping = {
                 "variant_simple_col": DatabricksVariant,
                 "variant_nested_col": DatabricksVariant,
                 "variant_array_col": DatabricksVariant,
-                "variant_mixed_col": DatabricksVariant
+                "variant_mixed_col": DatabricksVariant,
             }
-            df.to_sql(table.__tablename__, engine, if_exists="append", index=False, dtype=dtype_mapping)
-            
+            df.to_sql(
+                table.__tablename__,
+                engine,
+                if_exists="append",
+                index=False,
+                dtype=dtype_mapping,
+            )
+
             stmt = select(table)
             df_result = pd.read_sql(stmt, engine)
             result_dict = df_result.iloc[0].to_dict()
             # Parse JSON values back to original format for comparison
-            for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']:
+            for key in [
+                "variant_simple_col",
+                "variant_nested_col",
+                "variant_array_col",
+                "variant_mixed_col",
+            ]:
                 if result_dict[key] is not None:
                     result_dict[key] = json.loads(result_dict[key])
 
             assert result_dict == sample_data
 
+    def test_variant_table_creation_pandas_multi(self):
+        table, sample_data = self.sample_variant_table()
+
+        with self.table_context(table) as engine:
+            second = sample_data | {"int_col": 2}
+            df = pd.DataFrame([sample_data, second])
+            dtype_mapping = {
+                "variant_simple_col": DatabricksVariant,
+                "variant_nested_col": DatabricksVariant,
+                "variant_array_col": DatabricksVariant,
+                "variant_mixed_col": DatabricksVariant,
+            }
+            df.to_sql(
+                table.__tablename__,
+                engine,
+                if_exists="append",
+                index=False,
+                dtype=dtype_mapping,
+                method="multi",
+            )
+
+            stmt = select(table).order_by(table.int_col)
+            df_result = pd.read_sql(stmt, engine)
+            first_row = df_result.iloc[0].to_dict()
+            second_row = df_result.iloc[1].to_dict()
+            for key in [
+                "variant_simple_col",
+                "variant_nested_col",
+                "variant_array_col",
+                "variant_mixed_col",
+            ]:
+                if first_row[key] is not None:
+                    first_row[key] = json.loads(first_row[key])
+                if second_row[key] is not None:
+                    second_row[key] = json.loads(second_row[key])
+
+            assert first_row == sample_data
+            assert second_row == second
+
     def test_variant_literal_processor(self):
         table, sample_data = self.sample_variant_table()
 
@@ -291,8 +391,7 @@ def test_variant_literal_processor(self):
 
             try:
                 compiled = stmt.compile(
-                    dialect=engine.dialect,
-                    compile_kwargs={"literal_binds": True}
+                    dialect=engine.dialect, compile_kwargs={"literal_binds": True}
                 )
                 sql_str = str(compiled)
 
@@ -311,7 +410,12 @@ def test_variant_literal_processor(self):
             compare = {key: getattr(result, key) for key in sample_data.keys()}
 
             # Parse JSON values back to original Python objects
-            for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']:
+            for key in [
+                "variant_simple_col",
+                "variant_nested_col",
+                "variant_array_col",
+                "variant_mixed_col",
+            ]:
                 if compare[key] is not None:
                     compare[key] = json.loads(compare[key])