cre-dev · martinv13 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "xml2db"
-version = "0.13.2"
+version = "0.13.3"
 authors = [
   { name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
 ]

diff --git a/src/xml2db/dialect/duckdb.py b/src/xml2db/dialect/duckdb.py
@@ -158,7 +158,7 @@ def bulk_insert(self, conn: Any, table: Any, records: list) -> None:
             sql = text(
                 f"INSERT INTO {full_name} ({insert_cols}) "
                 f"SELECT {select_exprs} "
-                f"FROM read_csv('{safe_path}', header=true, nullstr='', all_varchar=true)"
+                f"FROM read_csv('{safe_path}', header=true, nullstr='', all_varchar=true, quote='\"', escape='\"')"
             )
             conn.execute(sql)
         finally:

diff --git a/tests/test_bulk_insert.py b/tests/test_bulk_insert.py
@@ -168,6 +168,27 @@ def test_duckdb_bulk_insert_scalar_column_default(duckdb_engine):
     assert rows[1]["flag"] is False
 
 
+def test_duckdb_bulk_insert_quoted_csv_field_after_large_unquoted_sample(duckdb_engine):
+    """Regression: DuckDB's CSV sniffer uses only the first ~20k rows as a sample.
+
+    If all sampled rows are unquoted, the sniffer sets quote=(empty), causing a
+    column-count error when it later hits a row whose cell value contains a comma
+    (making csv.writer emit a quoted field).  Explicitly passing quote='"' to
+    read_csv bypasses auto-detection and must always be present.
+    """
+    table = _make_table(duckdb_engine, "quoted_field_test")
+    # 'vals' value that contains a comma — document.py's 'join' transform can produce
+    # strings like '"val,ue",other' which csv.writer then wraps in outer quotes,
+    # yielding a quoted CSV cell.
+    problematic_value = '"val,ue",other_value'
+    records = [
+        {"id": i, "label": "simple"} for i in range(25_000)  # exceeds sniffer sample
+    ] + [{"id": 25_000, "label": problematic_value}]
+    rows = _roundtrip(duckdb_engine, table, records)
+    assert len(rows) == 25_001
+    assert rows[-1]["label"] == problematic_value
+
+
 def test_duckdb_bulk_insert_empty(duckdb_engine):
     table = _make_table(duckdb_engine, "empty_test")
     dialect = DuckDBDialect()