Skip to content

Commit feb0612

Browse files
authored
Merge pull request #671 from Benjamin-Knight/feat/#626-string-type-defaults
Add opt-in native string type mappings via behaviour flag (#626).
2 parents c46173b + d1bd626 commit feb0612

3 files changed

Lines changed: 178 additions & 1 deletion

File tree

dbt/adapters/sqlserver/sqlserver_adapter.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from dbt.adapters.events.types import SchemaCreation
1919
from dbt.adapters.reference_keys import _make_ref_key_dict
2020
from dbt.adapters.sql.impl import CREATE_SCHEMA_MACRO_NAME, SQLAdapter
21-
from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn
21+
from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn, SQLServerColumnNative
2222
from dbt.adapters.sqlserver.sqlserver_configs import SQLServerConfigs
2323
from dbt.adapters.sqlserver.sqlserver_connections import SQLServerConnectionManager
2424
from dbt.adapters.sqlserver.sqlserver_relation import SQLServerRelation
@@ -48,6 +48,11 @@ class SQLServerAdapter(SQLAdapter):
4848
ConstraintType.foreign_key: ConstraintSupport.ENFORCED,
4949
}
5050

51+
def __init__(self, config, mp_context=None):
52+
super().__init__(config, mp_context)
53+
if self.behavior.dbt_sqlserver_use_native_string_types:
54+
self.Column = SQLServerColumnNative
55+
5156
@property
5257
def _behavior_flags(self) -> List[BehaviorFlag]:
5358
return [
@@ -71,6 +76,17 @@ def _behavior_flags(self) -> List[BehaviorFlag]:
7176
"macro in your project instead."
7277
),
7378
},
79+
{
80+
"name": "dbt_sqlserver_use_native_string_types",
81+
"default": False,
82+
"description": (
83+
"When True, uses SQL Server-native string type mappings: "
84+
"STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000). "
85+
"When False (default), preserves legacy mappings: "
86+
"STRING and NVARCHAR -> VARCHAR(8000), NCHAR -> CHAR(1). "
87+
"The new behaviour is intended to become the default in a future release."
88+
),
89+
},
7490
]
7591

7692
@available.parse(lambda *a, **k: [])

dbt/adapters/sqlserver/sqlserver_column.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,20 @@ def can_expand_to(self, other_column: "SQLServerColumn") -> bool:
9797
if not self.is_string() or not other_column.is_string():
9898
return False
9999
return other_column.string_size() > self.string_size()
100+
101+
102+
class SQLServerColumnNative(SQLServerColumn):
103+
"""STRING maps to VARCHAR(MAX) (matches dbt convention) and NCHAR / NVARCHAR
104+
map to their unicode SQL Server types — fixing the legacy default where
105+
they were silently aliased to non-unicode CHAR(1) / VARCHAR(8000).
106+
NVARCHAR uses the maximum fixed-length form (4000 — the cap for fixed
107+
NVARCHAR since unicode is two bytes per character), parallel to VARCHAR(8000).
108+
Opt-in via the `dbt_sqlserver_use_native_string_types` behaviour flag;
109+
intended to become the default in a future release."""
110+
111+
TYPE_LABELS: ClassVar[Dict[str, str]] = {
112+
**SQLServerColumn.TYPE_LABELS,
113+
"STRING": "VARCHAR(MAX)",
114+
"NCHAR": "NCHAR(1)",
115+
"NVARCHAR": "NVARCHAR(4000)",
116+
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""Functional coverage for the dbt_sqlserver_use_native_string_types behaviour flag (#626).
2+
3+
Default (flag off): preserves the pre-existing mappings
4+
STRING -> VARCHAR(8000), NCHAR -> CHAR(1), NVARCHAR -> VARCHAR(8000).
5+
6+
Flag on: switches to SQL Server-native mappings
7+
STRING -> VARCHAR(MAX), NCHAR -> NCHAR(1), NVARCHAR -> NVARCHAR(4000).
8+
9+
Each path is exercised at two levels:
10+
- dict assertion on adapter.Column.TYPE_LABELS (cheap sanity check),
11+
- end-to-end: a contract-enforced model with `data_type: string|nchar|nvarchar`
12+
is materialised and the resulting column type read back from
13+
sys.columns + sys.types.
14+
"""
15+
16+
import pytest
17+
18+
from dbt.tests.util import run_dbt
19+
20+
contract_model_sql = """
21+
{{ config(materialized='table') }}
22+
select
23+
cast('hello' as varchar(50)) as str_col,
24+
cast('h' as char(1)) as nchar_col,
25+
cast('hello' as varchar(50)) as nvarchar_col
26+
"""
27+
28+
contract_model_yml = """
29+
version: 2
30+
models:
31+
- name: types_model
32+
config:
33+
contract:
34+
enforced: true
35+
columns:
36+
- name: str_col
37+
data_type: string
38+
- name: nchar_col
39+
data_type: nchar
40+
- name: nvarchar_col
41+
data_type: nvarchar
42+
"""
43+
44+
45+
def _column_types(project, schema: str, table: str) -> dict:
46+
"""Return {column_name: (data_type, character_maximum_length)} for a table.
47+
48+
Queries sys.columns + sys.types with a three-part OBJECT_ID so we don't
49+
depend on the connection's current-database context.
50+
"""
51+
# sys.columns.max_length is in bytes; for unicode (n*) types it's two bytes
52+
# per character, so we halve to get the declared character length. MAX is
53+
# reported as -1 in both cases.
54+
rows = project.run_sql(
55+
f"""
56+
select c.name, t.name, c.max_length
57+
from [{project.database}].sys.columns c
58+
inner join [{project.database}].sys.types t on c.user_type_id = t.user_type_id
59+
where c.object_id = object_id('[{project.database}].[{schema}].[{table}]')
60+
""",
61+
fetch="all",
62+
)
63+
result = {}
64+
for name, dtype, max_length in rows:
65+
if dtype in ("nchar", "nvarchar", "sysname") and max_length != -1:
66+
char_length = max_length // 2
67+
else:
68+
char_length = max_length
69+
result[name] = (dtype, char_length)
70+
return result
71+
72+
73+
# ---------------------------------------------------------------------------
74+
# Default behaviour — flag absent, mappings unchanged from pre-#626
75+
# ---------------------------------------------------------------------------
76+
77+
78+
class TestDefaultStringTypes:
79+
@pytest.fixture(scope="class")
80+
def models(self):
81+
return {
82+
"types_model.sql": contract_model_sql,
83+
"schema.yml": contract_model_yml,
84+
}
85+
86+
def test_type_labels_dict_default(self, project):
87+
labels = project.adapter.Column.TYPE_LABELS
88+
assert labels["STRING"] == "VARCHAR(8000)"
89+
assert labels["NCHAR"] == "CHAR(1)"
90+
assert labels["NVARCHAR"] == "VARCHAR(8000)"
91+
92+
def test_column_types_in_database_default(self, project):
93+
results = run_dbt(["run"])
94+
assert len(results) == 1
95+
assert results[0].status == "success"
96+
97+
types = _column_types(project, project.test_schema, "types_model")
98+
# STRING -> VARCHAR(8000)
99+
assert types["str_col"] == ("varchar", 8000)
100+
# NCHAR -> CHAR(1) (non-unicode under legacy)
101+
assert types["nchar_col"] == ("char", 1)
102+
# NVARCHAR -> VARCHAR(8000) (non-unicode under legacy)
103+
assert types["nvarchar_col"] == ("varchar", 8000)
104+
105+
106+
# ---------------------------------------------------------------------------
107+
# Native behaviour — flag enabled
108+
# ---------------------------------------------------------------------------
109+
110+
111+
class TestNativeStringTypes:
112+
@pytest.fixture(scope="class")
113+
def project_config_update(self):
114+
return {
115+
"flags": {
116+
"dbt_sqlserver_use_native_string_types": True,
117+
}
118+
}
119+
120+
@pytest.fixture(scope="class")
121+
def models(self):
122+
return {
123+
"types_model.sql": contract_model_sql,
124+
"schema.yml": contract_model_yml,
125+
}
126+
127+
def test_type_labels_dict_native(self, project):
128+
labels = project.adapter.Column.TYPE_LABELS
129+
assert labels["STRING"] == "VARCHAR(MAX)"
130+
assert labels["NCHAR"] == "NCHAR(1)"
131+
assert labels["NVARCHAR"] == "NVARCHAR(4000)"
132+
133+
def test_column_types_in_database_native(self, project):
134+
results = run_dbt(["run"])
135+
assert len(results) == 1
136+
assert results[0].status == "success"
137+
138+
types = _column_types(project, project.test_schema, "types_model")
139+
# STRING -> VARCHAR(MAX), reported as character_maximum_length = -1
140+
assert types["str_col"] == ("varchar", -1)
141+
# NCHAR -> NCHAR(1) (unicode)
142+
assert types["nchar_col"] == ("nchar", 1)
143+
# NVARCHAR -> NVARCHAR(4000) (unicode, max fixed-length)
144+
assert types["nvarchar_col"] == ("nvarchar", 4000)

0 commit comments

Comments
 (0)