From 8987e9c790fe8da7921b2b4fbd6240671cf6b835 Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Tue, 21 Apr 2026 17:25:47 +0530 Subject: [PATCH 1/2] test: add UTF-8 round-trip regression test for dbt seed Seeds a CSV containing Arabic, Greek, Chinese, emoji, and accented Latin then asserts each cell round-trips byte-exact through the seed path. Covers both V1 and V2 materialization. Guards the fix for #332, which reported non-ASCII characters being mangled on seed in 1.4.x. The current parameterized-insert path (adapter.add_query + bindings) handles UTF-8 correctly; this test locks that in. Co-authored-by: Isaac --- .../adapter/simple_seed/test_utf8_seed.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/functional/adapter/simple_seed/test_utf8_seed.py diff --git a/tests/functional/adapter/simple_seed/test_utf8_seed.py b/tests/functional/adapter/simple_seed/test_utf8_seed.py new file mode 100644 index 000000000..c0059bbd2 --- /dev/null +++ b/tests/functional/adapter/simple_seed/test_utf8_seed.py @@ -0,0 +1,32 @@ +import pytest +from dbt.tests import util + +from tests.functional.adapter.fixtures import MaterializationV2Mixin + + +UTF8_DATA = { + "arabic": "مرحبا بالعالم", + "greek": "Γειά σου Κόσμε", + "chinese": "你好世界", + "emoji": "Hello 🌍 World ✨", + "mixed": "café naïve résumé — ", +} +SEED_CSV = "label,value\n" + "\n".join(f"{k},{v}" for k, v in UTF8_DATA.items()) + + +class TestUtf8SeedRoundTrip: + @pytest.fixture(scope="class") + def seeds(self): + return {"utf8_roundtrip.csv": SEED_CSV} + + def test_utf8_roundtrip(self, project): + results = util.run_dbt(["seed"]) + assert len(results) == 1 + + relation = util.relation_from_name(project.adapter, "utf8_roundtrip") + rows = project.run_sql(f"select label, value from {relation}", fetch="all") + assert dict(rows) == UTF8_DATA + + +class TestUtf8SeedRoundTripV2(TestUtf8SeedRoundTrip, MaterializationV2Mixin): + pass From 37bf26801c62b44ac2a33be1c74c9982b3df1d4c Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Tue, 21 Apr 2026 17:48:07 +0530 Subject: [PATCH 2/2] style: ruff import block formatting Co-authored-by: Isaac --- tests/functional/adapter/simple_seed/test_utf8_seed.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/functional/adapter/simple_seed/test_utf8_seed.py b/tests/functional/adapter/simple_seed/test_utf8_seed.py index c0059bbd2..cd75d71d3 100644 --- a/tests/functional/adapter/simple_seed/test_utf8_seed.py +++ b/tests/functional/adapter/simple_seed/test_utf8_seed.py @@ -3,7 +3,6 @@ from tests.functional.adapter.fixtures import MaterializationV2Mixin - UTF8_DATA = { "arabic": "مرحبا بالعالم", "greek": "Γειά σου Κόσμε",