Fix: add pseudo columns for ingest time partitioned tables (#1033)

z3z1ma · web-flow · commit 95fde1e57b83 · 2023-06-26T09:08:40.000-07:00
* fix: add pseudo columns for ingest time partitioned tables

* feat: explicit pseudocolumn flag in method interface

* style: run make style

* chore: make pseudocolumn inclusion opt-in
diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py
@@ -462,7 +462,9 @@ def drop_view(self, view_name: TableName, ignore_if_not_exists: bool = True) ->
             exp.Drop(this=exp.to_table(view_name), exists=ignore_if_not_exists, kind="VIEW")
         )
 
-    def columns(self, table_name: TableName) -> t.Dict[str, exp.DataType]:
+    def columns(
+        self, table_name: TableName, include_pseudo_columns: bool = False
+    ) -> t.Dict[str, exp.DataType]:
         """Fetches column names and types for the target table."""
         self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE"))
         describe_output = self.cursor.fetchall()
diff --git a/sqlmesh/core/engine_adapter/base_postgres.py b/sqlmesh/core/engine_adapter/base_postgres.py
@@ -21,7 +21,9 @@ class BasePostgresEngineAdapter(EngineAdapter):
     COLUMNS_TABLE = "information_schema.columns"
     SUPPORTS_MATERIALIZED_VIEWS = True
 
-    def columns(self, table_name: TableName) -> t.Dict[str, exp.DataType]:
+    def columns(
+        self, table_name: TableName, include_pseudo_columns: bool = False
+    ) -> t.Dict[str, exp.DataType]:
         """Fetches column names and types for the target table."""
         table = exp.to_table(table_name)
         sql = (
diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py
@@ -99,13 +99,22 @@ def create_schema(self, schema_name: str, ignore_if_exists: bool = True) -> None
                     return
             raise e
 
-    def columns(self, table_name: TableName) -> t.Dict[str, exp.DataType]:
+    def columns(
+        self, table_name: TableName, include_pseudo_columns: bool = False
+    ) -> t.Dict[str, exp.DataType]:
         """Fetches column names and types for the target table."""
+        from google.cloud.bigquery import TimePartitioningType
+
         table = self._get_table(table_name)
-        return {
+        columns = {
             field.name: exp.DataType.build(field.field_type, dialect=self.dialect)
             for field in table.schema
         }
+        if include_pseudo_columns and table.time_partitioning and not table.time_partitioning.field:
+            columns["_PARTITIONTIME"] = exp.DataType.build("TIMESTAMP")
+            if table.time_partitioning.type_ == TimePartitioningType.DAY:
+                columns["_PARTITIONDATE"] = exp.DataType.build("DATE")
+        return columns
 
     def fetchone(
         self,
diff --git a/sqlmesh/core/schema_loader.py b/sqlmesh/core/schema_loader.py
@@ -54,7 +54,10 @@ def create_schema_file(
                 "columns": {c: t.sql(dialect=dialect) for c, t in columns.items()},
             }
             for table, columns in sorted(
-                pool.map(lambda table: (table, adapter.columns(table)), external_tables)
+                pool.map(
+                    lambda table: (table, adapter.columns(table, include_pseudo_columns=True)),
+                    external_tables,
+                )
             )
         ]
 

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,10 @@ def create_schema_file(`
`54`	`54`	`"columns": {c: t.sql(dialect=dialect) for c, t in columns.items()},`
`55`	`55`	`}`
`56`	`56`	`for table, columns in sorted(`
`57`		`- pool.map(lambda table: (table, adapter.columns(table)), external_tables)`
	`57`	`+ pool.map(`
	`58`	`+ lambda table: (table, adapter.columns(table, include_pseudo_columns=True)),`
	`59`	`+ external_tables,`
	`60`	`+ )`
`58`	`61`	`)`
`59`	`62`	`]`
`60`	`63`