Skip to content

Commit 041844c

Browse files
authored
Merge branch 'main' into add-databricks-query-tags-session-properties
2 parents 3171010 + 9d2ecea commit 041844c

7 files changed

Lines changed: 116 additions & 1 deletion

File tree

docs/integrations/engines/duckdb.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
7979
type: ducklake
8080
path: 'catalog.ducklake'
8181
data_path: data/ducklake
82+
override_data_path: true
8283
encrypted: True
8384
data_inlining_row_limit: 10
8485
metadata_schema: main
@@ -105,6 +106,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
105106
type="ducklake",
106107
path="catalog.ducklake",
107108
data_path="data/ducklake",
109+
override_data_path=False,
108110
encrypted=True,
109111
data_inlining_row_limit=10,
110112
metadata_schema="main",
@@ -120,6 +122,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
120122

121123
- `path`: Path to the DuckLake catalog file
122124
- `data_path`: Path where DuckLake data files are stored
125+
- `override_data_path`: Whether data_override_path option is set
123126
- `encrypted`: Whether to enable encryption for the catalog (default: `False`)
124127
- `data_inlining_row_limit`: Maximum number of rows to inline in the catalog (default: `0`)
125128
- `metadata_schema`: The schema in the catalog server in which to store the DuckLake metadata tables (default: `main`)
@@ -364,6 +367,7 @@ The `filesystems` accepts a list of file systems to register in the DuckDB conne
364367
type: ducklake
365368
path: myducklakecatalog.duckdb
366369
data_path: abfs://MyFabricWorkspace/MyFabricLakehouse.Lakehouse/Files/DuckLake.Files
370+
override_data_path: False
367371
extensions:
368372
- ducklake
369373
filesystems:

sqlmesh/core/config/connection.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ class DuckDBAttachOptions(BaseConfig):
238238

239239
# DuckLake specific options
240240
data_path: t.Optional[str] = None
241+
override_data_path: t.Optional[bool] = False
241242
encrypted: bool = False
242243
data_inlining_row_limit: t.Optional[int] = None
243244
metadata_schema: t.Optional[str] = None
@@ -258,6 +259,8 @@ def to_sql(self, alias: str) -> str:
258259
path = f"ducklake:{path}"
259260
if self.data_path is not None:
260261
options.append(f"DATA_PATH '{self.data_path}'")
262+
if self.override_data_path:
263+
options.append("OVERRIDE_DATA_PATH true")
261264
if self.encrypted:
262265
options.append("ENCRYPTED")
263266
if self.data_inlining_row_limit is not None:
@@ -2097,6 +2100,7 @@ class ClickhouseConnectionConfig(ConnectionConfig):
20972100
https_proxy: t.Optional[str] = None
20982101
server_host_name: t.Optional[str] = None
20992102
tls_mode: t.Optional[str] = None
2103+
secure: bool = False
21002104

21012105
concurrent_tasks: int = 1
21022106
register_comments: bool = True
@@ -2133,6 +2137,7 @@ def _connection_kwargs_keys(self) -> t.Set[str]:
21332137
"https_proxy",
21342138
"server_host_name",
21352139
"tls_mode",
2140+
"secure",
21362141
}
21372142
return kwargs
21382143

sqlmesh/core/dialect.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,8 @@ def format_model_expressions(
774774
if rewrite_casts:
775775

776776
def cast_to_colon(node: exp.Expr) -> exp.Expr:
777-
if isinstance(node, exp.Cast) and not any(
777+
# Directly check type instead of isinstance to avoid rewriting subclasses of CAST, e.g. JSONCast
778+
if type(node) is exp.Cast and not any(
778779
# Only convert CAST into :: if it doesn't have additional args set, otherwise this
779780
# conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST)
780781
arg

sqlmesh/core/engine_adapter/databricks.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,26 @@ def _build_column_defs(
467467
return super()._build_column_defs(
468468
target_columns_to_types, column_descriptions, is_view, materialized
469469
)
470+
471+
def columns(
472+
self, table_name: TableName, include_pseudo_columns: bool = False
473+
) -> t.Dict[str, exp.DataType]:
474+
table = exp.to_table(table_name)
475+
476+
column_catalog = table.catalog or self.get_current_catalog()
477+
query = (
478+
exp.select("columns.column_name", "columns.full_data_type")
479+
.from_("system.information_schema.columns")
480+
.where(
481+
exp.and_(
482+
exp.column("table_name").eq(table.name),
483+
exp.column("table_schema").eq(table.db),
484+
exp.column("table_catalog").eq(column_catalog),
485+
)
486+
)
487+
.order_by("ordinal_position ASC")
488+
)
489+
490+
result = self.cursor.fetchall(query)
491+
492+
return {row[0]: exp.DataType.build(row[1], dialect=self.dialect) for row in result}

tests/core/engine_adapter/test_databricks.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,3 +646,61 @@ def test_drop_data_object_materialized_view_calls_correct_drop(mocker: MockFixtu
646646
drop_view_mock.assert_called_once_with(
647647
mv_data_object.to_table(), ignore_if_not_exists=True, materialized=True
648648
)
649+
650+
651+
def test_columns(mocker: MockFixture, make_mocked_engine_adapter: t.Callable):
652+
adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog")
653+
654+
# Override/mock get_current_catalog to return default
655+
current_catalog_mock = mocker.patch.object(
656+
adapter, "get_current_catalog", return_value="test_catalog"
657+
)
658+
# create long struct columns datatype
659+
long_struct_cols = [f"a_{i}:int" for i in range(50)]
660+
adapter.cursor.fetchall.return_value = [
661+
("bigint_col", "bigint"),
662+
("binary_col", "binary"),
663+
("boolean_col", "boolean"),
664+
("date_col", "date"),
665+
("decimal_col", "decimal(38,4)"),
666+
("double_col", "double"),
667+
("float_col", "float"),
668+
("int_col", "int"),
669+
("small_int", "smallint"),
670+
("string_col", "string"),
671+
("timestamp_col", "timestamp"),
672+
("timestamp_ntz_col", "timestamp_ntz"),
673+
("tinyint_col", "tinyint"),
674+
("array_col", "array<int>"),
675+
("simple_struct_col", "struct<a:int,b:string>"),
676+
("long_struct_col", f"struct<{','.join(long_struct_cols)}>"),
677+
]
678+
679+
resp = adapter.columns("test_db.test_table")
680+
assert resp == {
681+
"bigint_col": exp.DataType.build("bigint", dialect=adapter.dialect),
682+
"binary_col": exp.DataType.build("binary", dialect=adapter.dialect),
683+
"boolean_col": exp.DataType.build("boolean", dialect=adapter.dialect),
684+
"date_col": exp.DataType.build("date", dialect=adapter.dialect),
685+
"decimal_col": exp.DataType.build("decimal(38,4)", dialect=adapter.dialect),
686+
"double_col": exp.DataType.build("double", dialect=adapter.dialect),
687+
"float_col": exp.DataType.build("float", dialect=adapter.dialect),
688+
"int_col": exp.DataType.build("int", dialect=adapter.dialect),
689+
"small_int": exp.DataType.build("smallint", dialect=adapter.dialect),
690+
"string_col": exp.DataType.build("string", dialect=adapter.dialect),
691+
"timestamp_col": exp.DataType.build("timestamp", dialect=adapter.dialect),
692+
"timestamp_ntz_col": exp.DataType.build("timestamp_ntz", dialect=adapter.dialect),
693+
"tinyint_col": exp.DataType.build("tinyint", dialect=adapter.dialect),
694+
"array_col": exp.DataType.build("array<int>", dialect=adapter.dialect),
695+
"simple_struct_col": exp.DataType.build("struct<a:int,b:string>", dialect=adapter.dialect),
696+
"long_struct_col": exp.DataType.build(
697+
f"struct<{','.join(long_struct_cols)}>", dialect=adapter.dialect
698+
),
699+
}
700+
701+
adapter.cursor.fetchall.assert_called_once_with(
702+
parse_one(
703+
"""SELECT columns.column_name, columns.full_data_type FROM system.information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'test_db' AND table_catalog = 'test_catalog' ORDER BY ordinal_position ASC""",
704+
dialect="databricks",
705+
)
706+
)

tests/core/test_connection_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ def test_duckdb_attach_ducklake_catalog(make_config):
810810
type="ducklake",
811811
path="catalog.ducklake",
812812
data_path="/tmp/ducklake_data",
813+
override_data_path=False,
813814
encrypted=True,
814815
data_inlining_row_limit=10,
815816
),

tests/core/test_dialect.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,29 @@ def test_format_model_expressions():
207207
SAFE_CAST('bla' AS INT64) AS FOO"""
208208
)
209209

210+
x = format_model_expressions(
211+
parse(
212+
"""
213+
MODEL(name a.b, kind FULL, dialect clickhouse);
214+
SELECT data.:String AS foo, CAST(1 AS INT) AS bar
215+
"""
216+
),
217+
dialect="clickhouse",
218+
)
219+
# JSONCast (e.g. `.:` syntax in ClickHouse) must not be written to `::`
220+
assert (
221+
x
222+
== """MODEL (
223+
name a.b,
224+
kind FULL,
225+
dialect clickhouse
226+
);
227+
228+
SELECT
229+
data.:String AS foo,
230+
1::Int32 AS bar"""
231+
)
232+
210233
x = format_model_expressions(
211234
parse(
212235
"""

0 commit comments

Comments
 (0)