Skip to content

Commit 01c17c5

Browse files
committed
fix(sqlalchemy-spanner): add TOKENLIST type and filter HIDDEN columns in introspection
Spanner full-text search uses TOKENLIST columns (always HIDDEN and generated), but the dialect does not recognize this type — _type_map has no entry and _designate_type() crashes with KeyError on any unrecognized type string. This change: 1. Adds a first-class TOKENLIST type (TypeEngine subclass) with forward mapping (_type_map), inverse mapping (_type_map_inv), and DDL compilation (SpannerTypeCompiler.visit_TOKENLIST). This ensures schema introspection can roundtrip correctly through Alembic and other DDL tools. 2. Adds a fallback in _designate_type() so unrecognized types return NullType with a warning instead of crashing. This future-proofs against new Spanner types. 3. Filters HIDDEN columns in get_multi_columns() by checking INFORMATION_SCHEMA.COLUMNS.IS_HIDDEN. HIDDEN columns are excluded from SELECT *, cannot be written to, and are infrastructure-only (e.g. TOKENLIST columns backing search indexes). They should not appear in reflected models. The Go client had the same gap and fixed it in googleapis/google-cloud-go#11522 (released in spanner v1.78.0), which added TOKENLIST as a first-class TypeBase and HIDDEN as a property of ColumnDef. Fixes #16621
1 parent c3bd6c0 commit 01c17c5

File tree

2 files changed

+112
-1
lines changed

2 files changed

+112
-1
lines changed

packages/sqlalchemy-spanner/google/cloud/sqlalchemy_spanner/sqlalchemy_spanner.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import base64
15+
import warnings
1516

1617
import re
1718

@@ -107,6 +108,21 @@ def process(value):
107108
return process
108109

109110

111+
class TOKENLIST(types.TypeEngine):
112+
"""Spanner TOKENLIST type for full-text search indexes.
113+
114+
TOKENLIST columns store tokenized text produced by functions like
115+
TOKENIZE_FULLTEXT() and back SEARCH INDEX structures. They are always
116+
generated, always HIDDEN, and cannot be read or written directly by
117+
applications.
118+
119+
This type exists so that schema introspection can roundtrip correctly
120+
(reflect → DDL generation) without losing type information.
121+
"""
122+
123+
__visit_name__ = "TOKENLIST"
124+
125+
110126
# Spanner-to-SQLAlchemy types map
111127
_type_map = {
112128
"BOOL": types.Boolean,
@@ -122,6 +138,7 @@ def process(value):
122138
"TIMESTAMP": types.TIMESTAMP,
123139
"ARRAY": types.ARRAY,
124140
"JSON": types.JSON,
141+
"TOKENLIST": TOKENLIST,
125142
}
126143

127144

@@ -140,6 +157,7 @@ def process(value):
140157
types.TIMESTAMP: "TIMESTAMP",
141158
types.Integer: "INT64",
142159
types.NullType: "INT64",
160+
TOKENLIST: "TOKENLIST",
143161
}
144162

145163
_compound_keywords = {
@@ -819,6 +837,9 @@ def visit_BIGINT(self, type_, **kw):
819837
def visit_JSON(self, type_, **kw):
820838
return "JSON"
821839

840+
def visit_TOKENLIST(self, type_, **kw):
841+
return "TOKENLIST"
842+
822843

823844
class SpannerDialect(DefaultDialect):
824845
"""Cloud Spanner dialect.
@@ -1145,6 +1166,7 @@ def get_multi_columns(
11451166
{table_type_query}
11461167
{schema_filter_query}
11471168
col.table_catalog = ''
1169+
AND (col.is_hidden != 'TRUE' OR col.is_hidden IS NULL)
11481170
ORDER BY
11491171
col.table_catalog,
11501172
col.table_schema,
@@ -1226,7 +1248,14 @@ def _designate_type(self, str_repr):
12261248
inner_type = self._designate_type(inner_type_str)
12271249
return _type_map["ARRAY"](inner_type)
12281250
else:
1229-
return _type_map[str_repr]
1251+
try:
1252+
return _type_map[str_repr]
1253+
except KeyError:
1254+
warnings.warn(
1255+
"Did not recognize Spanner type '%s', "
1256+
"mapping it to NullType" % str_repr
1257+
)
1258+
return types.NullType()
12301259

12311260
@engine_to_connection
12321261
def get_multi_indexes(
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import warnings
16+
17+
from sqlalchemy import types
18+
from sqlalchemy.testing.plugin.plugin_base import fixtures
19+
20+
from google.cloud.sqlalchemy_spanner import sqlalchemy_spanner
21+
22+
23+
class TestDesignateType(fixtures.TestBase):
24+
"""Unit tests for SpannerDialect._designate_type."""
25+
26+
def setup_method(self):
27+
self.dialect = sqlalchemy_spanner.SpannerDialect()
28+
29+
def test_known_types(self):
30+
assert isinstance(self.dialect._designate_type("BOOL"), types.Boolean)
31+
assert isinstance(self.dialect._designate_type("INT64"), types.BIGINT)
32+
assert isinstance(self.dialect._designate_type("FLOAT64"), types.Float)
33+
assert isinstance(self.dialect._designate_type("DATE"), types.DATE)
34+
assert isinstance(self.dialect._designate_type("TIMESTAMP"), types.TIMESTAMP)
35+
assert isinstance(self.dialect._designate_type("JSON"), types.JSON)
36+
37+
def test_string_with_length(self):
38+
result = self.dialect._designate_type("STRING(255)")
39+
assert isinstance(result, types.String)
40+
assert result.length == 255
41+
42+
def test_bytes_with_length(self):
43+
result = self.dialect._designate_type("BYTES(1024)")
44+
assert isinstance(result, types.LargeBinary)
45+
assert result.length == 1024
46+
47+
def test_tokenlist_returns_tokenlist_type(self):
48+
result = self.dialect._designate_type("TOKENLIST")
49+
assert isinstance(result, sqlalchemy_spanner.TOKENLIST)
50+
51+
def test_unknown_type_returns_nulltype_with_warning(self):
52+
with warnings.catch_warnings(record=True) as caught:
53+
warnings.simplefilter("always")
54+
result = self.dialect._designate_type("SOME_FUTURE_TYPE")
55+
assert isinstance(result, types.NullType)
56+
assert len(caught) == 1
57+
assert "SOME_FUTURE_TYPE" in str(caught[0].message)
58+
59+
def test_array_of_known_type(self):
60+
result = self.dialect._designate_type("ARRAY<INT64>")
61+
assert isinstance(result, types.ARRAY)
62+
63+
64+
class TestTokenlistType(fixtures.TestBase):
65+
"""Verify TOKENLIST is a proper first-class type."""
66+
67+
def test_in_type_map(self):
68+
assert "TOKENLIST" in sqlalchemy_spanner._type_map
69+
assert sqlalchemy_spanner._type_map["TOKENLIST"] is sqlalchemy_spanner.TOKENLIST
70+
71+
def test_in_inverse_type_map(self):
72+
assert sqlalchemy_spanner.TOKENLIST in sqlalchemy_spanner._type_map_inv
73+
assert sqlalchemy_spanner._type_map_inv[sqlalchemy_spanner.TOKENLIST] == "TOKENLIST"
74+
75+
def test_type_compiler_roundtrip(self):
76+
compiler = sqlalchemy_spanner.SpannerTypeCompiler(
77+
sqlalchemy_spanner.SpannerDialect()
78+
)
79+
assert compiler.process(sqlalchemy_spanner.TOKENLIST()) == "TOKENLIST"
80+
81+
def test_is_type_engine(self):
82+
assert issubclass(sqlalchemy_spanner.TOKENLIST, types.TypeEngine)

0 commit comments

Comments
 (0)