From d1bd626ade456c827e309323a10691f39776921f Mon Sep 17 00:00:00 2001 From: Ben Knight Date: Wed, 20 May 2026 12:37:13 +0000 Subject: [PATCH] Add opt-in native string type mappings via behaviour flag (#626). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces SQLServerColumnNative which maps STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), and NVARCHAR -> NVARCHAR(4000). The current default (SQLServerColumn) is unchanged: STRING / NVARCHAR -> VARCHAR(8000), NCHAR -> CHAR(1). Users opt into the new mappings by setting `dbt_sqlserver_use_native_string_types` to True in dbt_project.yml; the new behaviour is intended to become the default in a future release. Fixes a silent non-unicode aliasing of NCHAR / NVARCHAR in the default mappings (now opt-out), and uses NVARCHAR(4000) — the maximum fixed NVARCHAR size — rather than NVARCHAR(8000) which exceeds SQL Server's fixed-length cap. --- dbt/adapters/sqlserver/sqlserver_adapter.py | 18 ++- dbt/adapters/sqlserver/sqlserver_column.py | 17 +++ .../adapter/mssql/test_native_string_types.py | 144 ++++++++++++++++++ 3 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 tests/functional/adapter/mssql/test_native_string_types.py diff --git a/dbt/adapters/sqlserver/sqlserver_adapter.py b/dbt/adapters/sqlserver/sqlserver_adapter.py index 05d2ef72f..894c4336e 100644 --- a/dbt/adapters/sqlserver/sqlserver_adapter.py +++ b/dbt/adapters/sqlserver/sqlserver_adapter.py @@ -18,7 +18,7 @@ from dbt.adapters.events.types import SchemaCreation from dbt.adapters.reference_keys import _make_ref_key_dict from dbt.adapters.sql.impl import CREATE_SCHEMA_MACRO_NAME, SQLAdapter -from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn +from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn, SQLServerColumnNative from dbt.adapters.sqlserver.sqlserver_configs import SQLServerConfigs from dbt.adapters.sqlserver.sqlserver_connections import SQLServerConnectionManager from dbt.adapters.sqlserver.sqlserver_relation import SQLServerRelation @@ -48,6 +48,11 @@ class SQLServerAdapter(SQLAdapter): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } + def __init__(self, config, mp_context=None): + super().__init__(config, mp_context) + if self.behavior.dbt_sqlserver_use_native_string_types: + self.Column = SQLServerColumnNative + @property def _behavior_flags(self) -> List[BehaviorFlag]: return [ @@ -71,6 +76,17 @@ def _behavior_flags(self) -> List[BehaviorFlag]: "macro in your project instead." ), }, + { + "name": "dbt_sqlserver_use_native_string_types", + "default": False, + "description": ( + "When True, uses SQL Server-native string type mappings: " + "STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000). " + "When False (default), preserves legacy mappings: " + "STRING and NVARCHAR -> VARCHAR(8000), NCHAR -> CHAR(1). " + "The new behaviour is intended to become the default in a future release." + ), + }, ] @available.parse(lambda *a, **k: []) diff --git a/dbt/adapters/sqlserver/sqlserver_column.py b/dbt/adapters/sqlserver/sqlserver_column.py index de205d5c9..d93281b5f 100644 --- a/dbt/adapters/sqlserver/sqlserver_column.py +++ b/dbt/adapters/sqlserver/sqlserver_column.py @@ -97,3 +97,20 @@ def can_expand_to(self, other_column: "SQLServerColumn") -> bool: if not self.is_string() or not other_column.is_string(): return False return other_column.string_size() > self.string_size() + + +class SQLServerColumnNative(SQLServerColumn): + """STRING maps to VARCHAR(MAX) (matches dbt convention) and NCHAR / NVARCHAR + map to their unicode SQL Server types — fixing the legacy default where + they were silently aliased to non-unicode CHAR(1) / VARCHAR(8000). + NVARCHAR uses the maximum fixed-length form (4000 — the cap for fixed + NVARCHAR since unicode is two bytes per character), parallel to VARCHAR(8000). + Opt-in via the `dbt_sqlserver_use_native_string_types` behaviour flag; + intended to become the default in a future release.""" + + TYPE_LABELS: ClassVar[Dict[str, str]] = { + **SQLServerColumn.TYPE_LABELS, + "STRING": "VARCHAR(MAX)", + "NCHAR": "NCHAR(1)", + "NVARCHAR": "NVARCHAR(4000)", + } diff --git a/tests/functional/adapter/mssql/test_native_string_types.py b/tests/functional/adapter/mssql/test_native_string_types.py new file mode 100644 index 000000000..7c4b28920 --- /dev/null +++ b/tests/functional/adapter/mssql/test_native_string_types.py @@ -0,0 +1,144 @@ +"""Functional coverage for the dbt_sqlserver_use_native_string_types behaviour flag (#626). + +Default (flag off): preserves the pre-existing mappings + STRING -> VARCHAR(8000), NCHAR -> CHAR(1), NVARCHAR -> VARCHAR(8000). + +Flag on: switches to SQL Server-native mappings + STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000). + +Each path is exercised at two levels: + - dict assertion on adapter.Column.TYPE_LABELS (cheap sanity check), + - end-to-end: a contract-enforced model with `data_type: string|nchar|nvarchar` + is materialised and the resulting column type read back from + sys.columns + sys.types. +""" + +import pytest + +from dbt.tests.util import run_dbt + +contract_model_sql = """ +{{ config(materialized='table') }} +select + cast('hello' as varchar(50)) as str_col, + cast('h' as char(1)) as nchar_col, + cast('hello' as varchar(50)) as nvarchar_col +""" + +contract_model_yml = """ +version: 2 +models: + - name: types_model + config: + contract: + enforced: true + columns: + - name: str_col + data_type: string + - name: nchar_col + data_type: nchar + - name: nvarchar_col + data_type: nvarchar +""" + + +def _column_types(project, schema: str, table: str) -> dict: + """Return {column_name: (data_type, character_maximum_length)} for a table. + + Queries sys.columns + sys.types with a three-part OBJECT_ID so we don't + depend on the connection's current-database context. + """ + # sys.columns.max_length is in bytes; for unicode (n*) types it's two bytes + # per character, so we halve to get the declared character length. MAX is + # reported as -1 in both cases. + rows = project.run_sql( + f""" + select c.name, t.name, c.max_length + from [{project.database}].sys.columns c + inner join [{project.database}].sys.types t on c.user_type_id = t.user_type_id + where c.object_id = object_id('[{project.database}].[{schema}].[{table}]') + """, + fetch="all", + ) + result = {} + for name, dtype, max_length in rows: + if dtype in ("nchar", "nvarchar", "sysname") and max_length != -1: + char_length = max_length // 2 + else: + char_length = max_length + result[name] = (dtype, char_length) + return result + + +# --------------------------------------------------------------------------- +# Default behaviour — flag absent, mappings unchanged from pre-#626 +# --------------------------------------------------------------------------- + + +class TestDefaultStringTypes: + @pytest.fixture(scope="class") + def models(self): + return { + "types_model.sql": contract_model_sql, + "schema.yml": contract_model_yml, + } + + def test_type_labels_dict_default(self, project): + labels = project.adapter.Column.TYPE_LABELS + assert labels["STRING"] == "VARCHAR(8000)" + assert labels["NCHAR"] == "CHAR(1)" + assert labels["NVARCHAR"] == "VARCHAR(8000)" + + def test_column_types_in_database_default(self, project): + results = run_dbt(["run"]) + assert len(results) == 1 + assert results[0].status == "success" + + types = _column_types(project, project.test_schema, "types_model") + # STRING -> VARCHAR(8000) + assert types["str_col"] == ("varchar", 8000) + # NCHAR -> CHAR(1) (non-unicode under legacy) + assert types["nchar_col"] == ("char", 1) + # NVARCHAR -> VARCHAR(8000) (non-unicode under legacy) + assert types["nvarchar_col"] == ("varchar", 8000) + + +# --------------------------------------------------------------------------- +# Native behaviour — flag enabled +# --------------------------------------------------------------------------- + + +class TestNativeStringTypes: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "dbt_sqlserver_use_native_string_types": True, + } + } + + @pytest.fixture(scope="class") + def models(self): + return { + "types_model.sql": contract_model_sql, + "schema.yml": contract_model_yml, + } + + def test_type_labels_dict_native(self, project): + labels = project.adapter.Column.TYPE_LABELS + assert labels["STRING"] == "VARCHAR(MAX)" + assert labels["NCHAR"] == "NCHAR(1)" + assert labels["NVARCHAR"] == "NVARCHAR(4000)" + + def test_column_types_in_database_native(self, project): + results = run_dbt(["run"]) + assert len(results) == 1 + assert results[0].status == "success" + + types = _column_types(project, project.test_schema, "types_model") + # STRING -> VARCHAR(MAX), reported as character_maximum_length = -1 + assert types["str_col"] == ("varchar", -1) + # NCHAR -> NCHAR(1) (unicode) + assert types["nchar_col"] == ("nchar", 1) + # NVARCHAR -> NVARCHAR(4000) (unicode, max fixed-length) + assert types["nvarchar_col"] == ("nvarchar", 4000)