Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion dbt/adapters/sqlserver/sqlserver_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dbt.adapters.events.types import SchemaCreation
from dbt.adapters.reference_keys import _make_ref_key_dict
from dbt.adapters.sql.impl import CREATE_SCHEMA_MACRO_NAME, SQLAdapter
from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn
from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn, SQLServerColumnNative
from dbt.adapters.sqlserver.sqlserver_configs import SQLServerConfigs
from dbt.adapters.sqlserver.sqlserver_connections import SQLServerConnectionManager
from dbt.adapters.sqlserver.sqlserver_relation import SQLServerRelation
Expand Down Expand Up @@ -48,6 +48,11 @@ class SQLServerAdapter(SQLAdapter):
ConstraintType.foreign_key: ConstraintSupport.ENFORCED,
}

def __init__(self, config, mp_context=None):
super().__init__(config, mp_context)
if self.behavior.dbt_sqlserver_use_native_string_types:
self.Column = SQLServerColumnNative

@property
def _behavior_flags(self) -> List[BehaviorFlag]:
return [
Expand All @@ -71,6 +76,17 @@ def _behavior_flags(self) -> List[BehaviorFlag]:
"macro in your project instead."
),
},
{
"name": "dbt_sqlserver_use_native_string_types",
"default": False,
"description": (
"When True, uses SQL Server-native string type mappings: "
"STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000). "
"When False (default), preserves legacy mappings: "
"STRING and NVARCHAR -> VARCHAR(8000), NCHAR -> CHAR(1). "
"The new behaviour is intended to become the default in a future release."
),
},
]

@available.parse(lambda *a, **k: [])
Expand Down
17 changes: 17 additions & 0 deletions dbt/adapters/sqlserver/sqlserver_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,20 @@ def can_expand_to(self, other_column: "SQLServerColumn") -> bool:
if not self.is_string() or not other_column.is_string():
return False
return other_column.string_size() > self.string_size()


class SQLServerColumnNative(SQLServerColumn):
"""STRING maps to VARCHAR(MAX) (matches dbt convention) and NCHAR / NVARCHAR
map to their unicode SQL Server types — fixing the legacy default where
they were silently aliased to non-unicode CHAR(1) / VARCHAR(8000).
NVARCHAR uses the maximum fixed-length form (4000 — the cap for fixed
NVARCHAR since unicode is two bytes per character), parallel to VARCHAR(8000).
Opt-in via the `dbt_sqlserver_use_native_string_types` behaviour flag;
intended to become the default in a future release."""

TYPE_LABELS: ClassVar[Dict[str, str]] = {
**SQLServerColumn.TYPE_LABELS,
"STRING": "VARCHAR(MAX)",
"NCHAR": "NCHAR(1)",
"NVARCHAR": "NVARCHAR(4000)",
}
144 changes: 144 additions & 0 deletions tests/functional/adapter/mssql/test_native_string_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Functional coverage for the dbt_sqlserver_use_native_string_types behaviour flag (#626).

Default (flag off): preserves the pre-existing mappings
STRING -> VARCHAR(8000), NCHAR -> CHAR(1), NVARCHAR -> VARCHAR(8000).

Flag on: switches to SQL Server-native mappings
STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000).

Each path is exercised at two levels:
- dict assertion on adapter.Column.TYPE_LABELS (cheap sanity check),
- end-to-end: a contract-enforced model with `data_type: string|nchar|nvarchar`
is materialised and the resulting column type read back from
sys.columns + sys.types.
"""

import pytest

from dbt.tests.util import run_dbt

contract_model_sql = """
{{ config(materialized='table') }}
select
cast('hello' as varchar(50)) as str_col,
cast('h' as char(1)) as nchar_col,
cast('hello' as varchar(50)) as nvarchar_col
"""

contract_model_yml = """
version: 2
models:
- name: types_model
config:
contract:
enforced: true
columns:
- name: str_col
data_type: string
- name: nchar_col
data_type: nchar
- name: nvarchar_col
data_type: nvarchar
"""


def _column_types(project, schema: str, table: str) -> dict:
"""Return {column_name: (data_type, character_maximum_length)} for a table.

Queries sys.columns + sys.types with a three-part OBJECT_ID so we don't
depend on the connection's current-database context.
"""
# sys.columns.max_length is in bytes; for unicode (n*) types it's two bytes
# per character, so we halve to get the declared character length. MAX is
# reported as -1 in both cases.
rows = project.run_sql(
f"""
select c.name, t.name, c.max_length
from [{project.database}].sys.columns c
inner join [{project.database}].sys.types t on c.user_type_id = t.user_type_id
where c.object_id = object_id('[{project.database}].[{schema}].[{table}]')
""",
fetch="all",
)
result = {}
for name, dtype, max_length in rows:
if dtype in ("nchar", "nvarchar", "sysname") and max_length != -1:
char_length = max_length // 2
else:
char_length = max_length
result[name] = (dtype, char_length)
return result


# ---------------------------------------------------------------------------
# Default behaviour — flag absent, mappings unchanged from pre-#626
# ---------------------------------------------------------------------------


class TestDefaultStringTypes:
@pytest.fixture(scope="class")
def models(self):
return {
"types_model.sql": contract_model_sql,
"schema.yml": contract_model_yml,
}

def test_type_labels_dict_default(self, project):
labels = project.adapter.Column.TYPE_LABELS
assert labels["STRING"] == "VARCHAR(8000)"
assert labels["NCHAR"] == "CHAR(1)"
assert labels["NVARCHAR"] == "VARCHAR(8000)"

def test_column_types_in_database_default(self, project):
results = run_dbt(["run"])
assert len(results) == 1
assert results[0].status == "success"

types = _column_types(project, project.test_schema, "types_model")
# STRING -> VARCHAR(8000)
assert types["str_col"] == ("varchar", 8000)
# NCHAR -> CHAR(1) (non-unicode under legacy)
assert types["nchar_col"] == ("char", 1)
# NVARCHAR -> VARCHAR(8000) (non-unicode under legacy)
assert types["nvarchar_col"] == ("varchar", 8000)


# ---------------------------------------------------------------------------
# Native behaviour — flag enabled
# ---------------------------------------------------------------------------


class TestNativeStringTypes:
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"flags": {
"dbt_sqlserver_use_native_string_types": True,
}
}

@pytest.fixture(scope="class")
def models(self):
return {
"types_model.sql": contract_model_sql,
"schema.yml": contract_model_yml,
}

def test_type_labels_dict_native(self, project):
labels = project.adapter.Column.TYPE_LABELS
assert labels["STRING"] == "VARCHAR(MAX)"
assert labels["NCHAR"] == "NCHAR(1)"
assert labels["NVARCHAR"] == "NVARCHAR(4000)"

def test_column_types_in_database_native(self, project):
results = run_dbt(["run"])
assert len(results) == 1
assert results[0].status == "success"

types = _column_types(project, project.test_schema, "types_model")
# STRING -> VARCHAR(MAX), reported as character_maximum_length = -1
assert types["str_col"] == ("varchar", -1)
# NCHAR -> NCHAR(1) (unicode)
assert types["nchar_col"] == ("nchar", 1)
# NVARCHAR -> NVARCHAR(4000) (unicode, max fixed-length)
assert types["nvarchar_col"] == ("nvarchar", 4000)