Skip to content

Commit 40d2446

Browse files
committed
fix: convert empty strings to NULL when transforming text columns to integer/float (#488)
1 parent 8f0c06e commit 40d2446

2 files changed

Lines changed: 50 additions & 6 deletions

File tree

sqlite_utils/db.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,14 +2004,23 @@ def transform_sql(
20042004
current_column_pairs = list(self.columns_dict.items())
20052005
new_column_pairs = []
20062006
copy_from_to = {column: column for column, _ in current_column_pairs}
2007-
for name, type_ in current_column_pairs:
2008-
type_ = types.get(name) or type_
2007+
# Columns whose type is being changed from text to integer/float; their
2008+
# empty-string values should be copied as NULL (issue #488).
2009+
nullif_empty_columns = set()
2010+
for name, old_type in current_column_pairs:
2011+
type_ = types.get(name) or old_type
20092012
if name in drop:
20102013
del [copy_from_to[name]]
20112014
continue
20122015
new_name = rename.get(name) or name
20132016
new_column_pairs.append((new_name, type_))
20142017
copy_from_to[name] = new_name
2018+
if (
2019+
name in types
2020+
and COLUMN_TYPE_MAPPING.get(old_type) == "TEXT"
2021+
and COLUMN_TYPE_MAPPING.get(type_) in ("INTEGER", "REAL", "FLOAT")
2022+
):
2023+
nullif_empty_columns.add(name)
20152024

20162025
if pk is DEFAULT:
20172026
pks_renamed = tuple(
@@ -2084,10 +2093,16 @@ def transform_sql(
20842093
if "rowid" not in new_cols:
20852094
new_cols.insert(0, "rowid")
20862095
old_cols.insert(0, "rowid")
2096+
select_exprs = []
2097+
for col in old_cols:
2098+
if col in nullif_empty_columns:
2099+
select_exprs.append("NULLIF({}, '')".format(quote_identifier(col)))
2100+
else:
2101+
select_exprs.append(quote_identifier(col))
20872102
copy_sql = "INSERT INTO {} ({new_cols})\n SELECT {old_cols} FROM {};".format(
20882103
quote_identifier(new_table_name),
20892104
quote_identifier(self.name),
2090-
old_cols=", ".join(quote_identifier(col) for col in old_cols),
2105+
old_cols=", ".join(select_exprs),
20912106
new_cols=", ".join(quote_identifier(col) for col in new_cols),
20922107
)
20932108
sqls.append(copy_sql)

tests/test_transform.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
{"types": {"age": int}},
2222
[
2323
'CREATE TABLE "dogs_new_suffix" (\n "id" INTEGER PRIMARY KEY,\n "name" TEXT,\n "age" INTEGER\n);',
24-
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "age")\n SELECT "rowid", "id", "name", "age" FROM "dogs";',
24+
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "age")\n SELECT "rowid", "id", "name", NULLIF("age", \'\') FROM "dogs";',
2525
'DROP TABLE "dogs";',
2626
'ALTER TABLE "dogs_new_suffix" RENAME TO "dogs";',
2727
],
@@ -51,7 +51,7 @@
5151
{"types": {"age": int}, "rename": {"age": "dog_age"}},
5252
[
5353
'CREATE TABLE "dogs_new_suffix" (\n "id" INTEGER PRIMARY KEY,\n "name" TEXT,\n "dog_age" INTEGER\n);',
54-
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "dog_age")\n SELECT "rowid", "id", "name", "age" FROM "dogs";',
54+
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "dog_age")\n SELECT "rowid", "id", "name", NULLIF("age", \'\') FROM "dogs";',
5555
'DROP TABLE "dogs";',
5656
'ALTER TABLE "dogs_new_suffix" RENAME TO "dogs";',
5757
],
@@ -144,7 +144,7 @@ def tracer(sql, params):
144144
{"types": {"age": int}},
145145
[
146146
'CREATE TABLE "dogs_new_suffix" (\n "id" INTEGER,\n "name" TEXT,\n "age" INTEGER\n);',
147-
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "age")\n SELECT "rowid", "id", "name", "age" FROM "dogs";',
147+
'INSERT INTO "dogs_new_suffix" ("rowid", "id", "name", "age")\n SELECT "rowid", "id", "name", NULLIF("age", \'\') FROM "dogs";',
148148
'DROP TABLE "dogs";',
149149
'ALTER TABLE "dogs_new_suffix" RENAME TO "dogs";',
150150
],
@@ -659,3 +659,32 @@ def test_transform_with_unique_constraint_implicit_index(fresh_db):
659659
"You must manually drop this index prior to running this transformation and manually recreate the new index after running this transformation."
660660
in str(excinfo.value)
661661
)
662+
663+
664+
def test_transform_empty_string_to_null_for_numeric_types(fresh_db):
665+
# Issue #488: converting a text column to integer/float should turn
666+
# empty-string values into NULLs instead of leaving them as ''.
667+
rows = fresh_db["rows"]
668+
rows.insert_all(
669+
[
670+
{"id": 1, "weight": "12.5", "count": "3"},
671+
{"id": 2, "weight": "", "count": ""},
672+
{"id": 3, "weight": "0", "count": "0"},
673+
],
674+
pk="id",
675+
)
676+
rows.transform(types={"weight": float, "count": int})
677+
assert rows.columns_dict == {"id": int, "weight": float, "count": int}
678+
assert list(rows.rows) == [
679+
{"id": 1, "weight": 12.5, "count": 3},
680+
{"id": 2, "weight": None, "count": None},
681+
{"id": 3, "weight": 0.0, "count": 0},
682+
]
683+
684+
685+
def test_transform_does_not_nullify_text_columns(fresh_db):
686+
# Empty strings in text columns must be preserved (issue #488).
687+
rows = fresh_db["rows"]
688+
rows.insert_all([{"id": 1, "name": ""}, {"id": 2, "name": "x"}], pk="id")
689+
rows.transform(types={"name": str})
690+
assert list(rows.rows) == [{"id": 1, "name": ""}, {"id": 2, "name": "x"}]

0 commit comments

Comments
 (0)