Skip to content

Commit a4ba14c

Browse files
committed
Promote Float(precision>24) to DOUBLE in CREATE TABLE
SQLAlchemy's default visit_float drops the precision argument when rendering for Databricks (no FLOAT(p) form exists), so Float(precision=53) silently compiled to a 32-bit FLOAT column. pandas.DataFrame.to_sql maps float64 to Float(precision=53), so every to_sql round-trip of a float64 column was being permanently truncated at the CREATE TABLE step — no way to recover the lost bits later, even after the INSERT-side fix in databricks-sql-python v4.2.6. Add a @compiles(Float, "databricks") that promotes to DOUBLE when precision > 24, matching the SQL-standard cutover from single to double precision. Float() without precision keeps the current FLOAT behavior; sqlalchemy.types.FLOAT (uppercase, explicit 32-bit) and sqlalchemy.types.Double are unaffected because they have their own __visit_name__. Co-authored-by: Isaac Signed-off-by: Jayant Singh <jayant.singh@databricks.com>
1 parent 3ab18ed commit a4ba14c

2 files changed

Lines changed: 85 additions & 0 deletions

File tree

src/databricks/sqlalchemy/_types.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,31 @@ def compile_numeric_databricks(type_, compiler, **kw):
8888
return compiler.visit_DECIMAL(type_, **kw)
8989

9090

91+
@compiles(sqlalchemy.types.Float, "databricks")
92+
def compile_float_databricks(type_, compiler, **kw):
93+
"""Promote ``Float(precision > 24)`` to ``DOUBLE`` (64-bit) on Databricks.
94+
95+
Databricks ``FLOAT`` is 32-bit (~7 significant digits) and ``DOUBLE`` is
96+
64-bit (~15-17 significant digits). SQLAlchemy's default ``visit_float``
97+
drops the precision argument entirely for Databricks (no ``FLOAT(p)`` form
98+
exists), so ``Float(precision=53)`` silently compiles to a 32-bit ``FLOAT``
99+
column. ``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``,
100+
which means every ``to_sql`` round-trip of a ``float64`` column was being
101+
permanently truncated at the ``CREATE TABLE`` step — there is no way to
102+
recover the lost bits later, even after the INSERT path was fixed in
103+
databricks-sql-python v4.2.6.
104+
105+
The 24-bit threshold matches the SQL standard convention: ``FLOAT(p)`` with
106+
``p <= 24`` is single precision (IEEE 754 binary32's 24-bit significand),
107+
``p > 24`` is double precision. ``Float()`` with no precision keeps the
108+
current ``FLOAT`` behavior — only callers who explicitly asked for >24-bit
109+
precision get the promotion.
110+
"""
111+
if getattr(type_, "precision", None) is not None and type_.precision > 24:
112+
return "DOUBLE"
113+
return "FLOAT"
114+
115+
91116
@compiles(sqlalchemy.types.DateTime, "databricks")
92117
def compile_datetime_databricks(type_, compiler, **kw):
93118
"""

tests/test_local/test_types.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,66 @@ def test_array_string_renders_as_array_of_string(self):
171171
)
172172

173173

174+
class TestFloatPrecisionPromotion(CompilationTestBase):
175+
"""Regression coverage for the pandas ``to_sql`` ``float64`` precision loss.
176+
177+
Databricks ``FLOAT`` is 32-bit; ``DOUBLE`` is 64-bit. SQLAlchemy's default
178+
``visit_float`` drops the precision argument when rendering for Databricks,
179+
so ``Float(precision=53)`` (what ``pandas.DataFrame.to_sql`` emits for
180+
``float64`` columns) was silently truncating to a 32-bit ``FLOAT`` column.
181+
182+
The fix is to promote ``Float`` to ``DOUBLE`` when ``precision > 24``,
183+
matching the SQL standard cutover from single to double precision.
184+
"""
185+
186+
def test_float_with_no_precision_remains_float(self):
187+
self._assert_compiled_value_explicit(sqlalchemy.types.Float(), "FLOAT")
188+
189+
def test_float_at_single_precision_boundary_remains_float(self):
190+
"""``precision=24`` is the upper bound of IEEE 754 single precision."""
191+
self._assert_compiled_value_explicit(
192+
sqlalchemy.types.Float(precision=24), "FLOAT"
193+
)
194+
195+
def test_float_above_single_precision_boundary_promotes_to_double(self):
196+
self._assert_compiled_value_explicit(
197+
sqlalchemy.types.Float(precision=25), "DOUBLE"
198+
)
199+
200+
def test_float_precision_53_promotes_to_double(self):
201+
"""``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``."""
202+
self._assert_compiled_value_explicit(
203+
sqlalchemy.types.Float(precision=53), "DOUBLE"
204+
)
205+
206+
def test_uppercase_float_with_high_precision_stays_float(self):
207+
"""``sqlalchemy.types.FLOAT`` is the backend-specific 32-bit type — a
208+
caller who reaches for the uppercase form is explicitly asking for
209+
``FLOAT``, so the precision argument should not promote it to DOUBLE.
210+
"""
211+
self._assert_compiled_value_explicit(
212+
sqlalchemy.types.FLOAT(precision=53), "FLOAT"
213+
)
214+
215+
def test_double_is_unaffected_by_float_compiler(self):
216+
"""The ``@compiles(Float)`` dispatch is keyed on ``__visit_name__`` —
217+
``Double`` has its own (``'double'``) so it must not be affected."""
218+
self._assert_compiled_value_explicit(sqlalchemy.types.Double(), "DOUBLE")
219+
self._assert_compiled_value_explicit(
220+
sqlalchemy.types.Double(precision=53), "DOUBLE"
221+
)
222+
223+
def test_create_table_with_float64_emits_double_column(self):
224+
"""End-to-end: what pandas ``to_sql`` of a ``float64`` column produces."""
225+
from sqlalchemy.schema import CreateTable
226+
227+
meta = MetaData()
228+
t = Table("df", meta, Column("value", sqlalchemy.types.Float(precision=53)))
229+
ddl = str(CreateTable(t).compile(dialect=self.dialect))
230+
assert "value DOUBLE" in ddl
231+
assert "value FLOAT" not in ddl
232+
233+
174234
class TestDatabricksUUID:
175235
"""Regression coverage for github.com/databricks/databricks-sqlalchemy/issues/50.
176236

0 commit comments

Comments
 (0)