Promote Float(precision>24) to DOUBLE in CREATE TABLE

jayantsing-db · jayantsing-db · commit a4ba14c12c5c · 2026-05-25T13:34:01.000Z
SQLAlchemy's default visit_float drops the precision argument when rendering for Databricks (no FLOAT(p) form exists), so Float(precision=53) silently compiled to a 32-bit FLOAT column. pandas.DataFrame.to_sql maps float64 to Float(precision=53), so every to_sql round-trip of a float64 column was being permanently truncated at the CREATE TABLE step — no way to recover the lost bits later, even after the INSERT-side fix in databricks-sql-python v4.2.6. Add a @compiles(Float, "databricks") that promotes to DOUBLE when precision > 24, matching the SQL-standard cutover from single to double precision. Float() without precision keeps the current FLOAT behavior; sqlalchemy.types.FLOAT (uppercase, explicit 32-bit) and sqlalchemy.types.Double are unaffected because they have their own __visit_name__. Co-authored-by: Isaac Signed-off-by: Jayant Singh <jayant.singh@databricks.com>
diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py
@@ -88,6 +88,31 @@ def compile_numeric_databricks(type_, compiler, **kw):
     return compiler.visit_DECIMAL(type_, **kw)
 
 
+@compiles(sqlalchemy.types.Float, "databricks")
+def compile_float_databricks(type_, compiler, **kw):
+    """Promote ``Float(precision > 24)`` to ``DOUBLE`` (64-bit) on Databricks.
+
+    Databricks ``FLOAT`` is 32-bit (~7 significant digits) and ``DOUBLE`` is
+    64-bit (~15-17 significant digits). SQLAlchemy's default ``visit_float``
+    drops the precision argument entirely for Databricks (no ``FLOAT(p)`` form
+    exists), so ``Float(precision=53)`` silently compiles to a 32-bit ``FLOAT``
+    column. ``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``,
+    which means every ``to_sql`` round-trip of a ``float64`` column was being
+    permanently truncated at the ``CREATE TABLE`` step — there is no way to
+    recover the lost bits later, even after the INSERT path was fixed in
+    databricks-sql-python v4.2.6.
+
+    The 24-bit threshold matches the SQL standard convention: ``FLOAT(p)`` with
+    ``p <= 24`` is single precision (IEEE 754 binary32's 24-bit significand),
+    ``p > 24`` is double precision. ``Float()`` with no precision keeps the
+    current ``FLOAT`` behavior — only callers who explicitly asked for >24-bit
+    precision get the promotion.
+    """
+    if getattr(type_, "precision", None) is not None and type_.precision > 24:
+        return "DOUBLE"
+    return "FLOAT"
+
+
 @compiles(sqlalchemy.types.DateTime, "databricks")
 def compile_datetime_databricks(type_, compiler, **kw):
     """
diff --git a/tests/test_local/test_types.py b/tests/test_local/test_types.py
@@ -171,6 +171,66 @@ def test_array_string_renders_as_array_of_string(self):
         )
 
 
+class TestFloatPrecisionPromotion(CompilationTestBase):
+    """Regression coverage for the pandas ``to_sql`` ``float64`` precision loss.
+
+    Databricks ``FLOAT`` is 32-bit; ``DOUBLE`` is 64-bit. SQLAlchemy's default
+    ``visit_float`` drops the precision argument when rendering for Databricks,
+    so ``Float(precision=53)`` (what ``pandas.DataFrame.to_sql`` emits for
+    ``float64`` columns) was silently truncating to a 32-bit ``FLOAT`` column.
+
+    The fix is to promote ``Float`` to ``DOUBLE`` when ``precision > 24``,
+    matching the SQL standard cutover from single to double precision.
+    """
+
+    def test_float_with_no_precision_remains_float(self):
+        self._assert_compiled_value_explicit(sqlalchemy.types.Float(), "FLOAT")
+
+    def test_float_at_single_precision_boundary_remains_float(self):
+        """``precision=24`` is the upper bound of IEEE 754 single precision."""
+        self._assert_compiled_value_explicit(
+            sqlalchemy.types.Float(precision=24), "FLOAT"
+        )
+
+    def test_float_above_single_precision_boundary_promotes_to_double(self):
+        self._assert_compiled_value_explicit(
+            sqlalchemy.types.Float(precision=25), "DOUBLE"
+        )
+
+    def test_float_precision_53_promotes_to_double(self):
+        """``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``."""
+        self._assert_compiled_value_explicit(
+            sqlalchemy.types.Float(precision=53), "DOUBLE"
+        )
+
+    def test_uppercase_float_with_high_precision_stays_float(self):
+        """``sqlalchemy.types.FLOAT`` is the backend-specific 32-bit type — a
+        caller who reaches for the uppercase form is explicitly asking for
+        ``FLOAT``, so the precision argument should not promote it to DOUBLE.
+        """
+        self._assert_compiled_value_explicit(
+            sqlalchemy.types.FLOAT(precision=53), "FLOAT"
+        )
+
+    def test_double_is_unaffected_by_float_compiler(self):
+        """The ``@compiles(Float)`` dispatch is keyed on ``__visit_name__`` —
+        ``Double`` has its own (``'double'``) so it must not be affected."""
+        self._assert_compiled_value_explicit(sqlalchemy.types.Double(), "DOUBLE")
+        self._assert_compiled_value_explicit(
+            sqlalchemy.types.Double(precision=53), "DOUBLE"
+        )
+
+    def test_create_table_with_float64_emits_double_column(self):
+        """End-to-end: what pandas ``to_sql`` of a ``float64`` column produces."""
+        from sqlalchemy.schema import CreateTable
+
+        meta = MetaData()
+        t = Table("df", meta, Column("value", sqlalchemy.types.Float(precision=53)))
+        ddl = str(CreateTable(t).compile(dialect=self.dialect))
+        assert "value DOUBLE" in ddl
+        assert "value FLOAT" not in ddl
+
+
 class TestDatabricksUUID:
     """Regression coverage for github.com/databricks/databricks-sqlalchemy/issues/50.