Skip to content
This repository was archived by the owner on Mar 16, 2026. It is now read-only.

Commit 89548bc

Browse files
committed
Updates processing_time_partitioning() method
1 parent 73eb01f commit 89548bc

File tree

1 file changed

+25
-45
lines changed

1 file changed

+25
-45
lines changed

sqlalchemy_bigquery/base.py

Lines changed: 25 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -843,51 +843,31 @@ def _process_time_partitioning(
843843
* DATE column
844844
"""
845845

846-
sqltypes = {
847-
# column_type | truncation func OR default value | partitioning_period(s)
848-
"_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period
849-
"_PARTITIONTIME": ("DATE", None), # trunc_fn, no period
850-
"DATE": {
851-
"no_period": (None, None), # date_column, no trunc_fn, no period
852-
"period": (
853-
"DATE_TRUNC",
854-
{"MONTH", "YEAR"},
855-
), # date_column, trunc_fn, period(s)
856-
},
857-
"DATETIME": {
858-
"no_period": ("DATE", None), # datetime_column, trunc_fn, no period
859-
"period": (
860-
"DATETIME_TRUNC",
861-
{"DAY", "HOUR", "MONTH", "YEAR"},
862-
), # datetime_column, trunc_fn, period(s)
863-
},
864-
"TIMESTAMP": {
865-
"no_period": ("DATE", None), # timestamp_column, trunc_fn, no period
866-
"period": (
867-
"TIMESTAMP_TRUNC",
868-
{"DAY", "HOUR", "MONTH", "YEAR"},
869-
), # timestamp_column, trunc_fn, period(s)
870-
},
871-
}
872-
873-
# Extract field (i.e <column_name> or _PARTITIONDATE)
874-
# AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE",
846+
# Extract field if given (i.e <column_name>) or _PARTITIONDATE if not given
847+
# AND extract the name of the column_type (i.e. is it a "TIMESTAMP", "DATE",
875848
# "DATETIME", "_PARTITIONDATE")
849+
# Also extract the time_partitioning.type_ (i.e. the truncation granularity:
850+
# HOUR, DAY, MONTH, YEAR)
876851
if time_partitioning.field is not None:
877852
field = time_partitioning.field
878853
column_type = table.columns[field].type.__visit_name__.upper()
879854

855+
# Extract time_partitioning.type_ (DAY, HOUR, MONTH, YEAR)
856+
# i.e. generates one partition per type (1/DAY, 1/HOUR)
857+
# NOTE: if time_partitioning.type_ == None, the python-bigquery library
858+
# will eventually overwrite it with a default of DAY.
859+
partitioning_period = time_partitioning.type_
860+
880861
else:
862+
# If no field is given, default to "_PARTITIONDATE" as the
863+
# field to partition on. In addition, to normalize the processing in
864+
# the remainder of this function, set column_type and partitioning_period
865+
# as shown below.
881866
field = "_PARTITIONDATE"
882867
column_type = "_PARTITIONDATE"
868+
partitioning_period = None
883869

884-
# Extract time_partitioning.type_ (DAY, HOUR, MONTH, YEAR)
885-
# i.e. generates one partition per type (1/DAY, 1/HOUR)
886-
# NOTE: if time_partitioning.type_ == None, it gets
887-
# immediately overwritten by python-bigquery to a default of DAY.
888-
partitioning_period = time_partitioning.type_
889-
890-
# TODO: move dict outside the function or to top of function
870+
# TODO: move this dictionary outside the function or to top of function
891871
sqltypes_w_no_partitioning_period = {
892872
# Keys are columns, values are functions
893873
"_PARTITIONDATE": None,
@@ -897,32 +877,32 @@ def _process_time_partitioning(
897877
"TIMESTAMP": "DATE", # <date function>
898878
}
899879

900-
# TODO: move dict outside the function or to top of function
880+
# TODO: move this dictionary outside the function or to top of function
901881
sqltypes_w_partitioning_period = {
902882
# Keys are columns, values are (functions, {allowed_partioning_periods})
883+
#"_PARTITIONDATE": ("_PARTITIONDATE", {}),
903884
"DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}),
904885
"DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}),
905886
"TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}),
906887
}
907888

908-
# Extract the default value or truncation_function (i.e. DATE_TRUNC())
909-
# and the set of allowable partition_periods
889+
# Extract truncation_function (i.e. DATE_TRUNC()) or a default value if
890+
# a truncation_function is not used (i.e. for _PARTITIONDATE)
891+
# Also extract the set of allowable partition_periods
910892
# that can be used in that function
911893
if partitioning_period is None:
912-
# do stuff via swnpp
913894
function = sqltypes_w_no_partitioning_period[column_type]
914895
else:
915-
# do different stuff via swpp
916-
function, allowed_partitions = sqltypes_w_partitioning_period[column_type, partitioning_period]
896+
function, allowed_partitions = sqltypes_w_partitioning_period[column_type]
917897

918898
# Create output:
919-
# Special Case: _PARTITIONDATE does NOT use a function or partitioning_period
899+
# Special Case 1: _PARTITIONDATE does NOT use a function or partitioning_period
920900
if function is None:
921901
return f"PARTITION BY {field}"
922902

923-
# Special Case: BigQuery will not accept DAY as partitioning_period for
903+
# Special Case 2: BigQuery will not accept DAY as partitioning_period for
924904
# DATE_TRUNC.
925-
# However, the default argument in python-bigquery for TimePartioning
905+
# However, the default argument in python-bigquery for TimePartitioning
926906
# is DAY. This case overwrites that to avoid making a breaking change in
927907
# python-bigquery.
928908
# https://github.com/googleapis/python-bigquery/blob/a4d9534a900f13ae7355904cda05097d781f27e3/google/cloud/bigquery/table.py#L2916

0 commit comments

Comments
 (0)