From 32e91cd78f45a08df63ecbe4692c2896ea484bd1 Mon Sep 17 00:00:00 2001 From: etonlels Date: Tue, 23 Jun 2026 16:45:54 -0600 Subject: [PATCH 1/4] fix(annotate_types): coerce bigquery date function arg types [CLAUDE] In BigQuery these functions return the type of their first argument. They were annotated via `_annotate_by_args("this")` (`DATE_ADD`, `DATE_TRUNC`, `DATETIME_TRUNC`, `TIMESTAMP_TRUNC`), which copies the first arg's type verbatim. For a date/datetime string-literal first arg that type is the literal's `VARCHAR`, so e.g. `DATE_ADD('2020-01-01', INTERVAL 1 DAY)` was annotated as `VARCHAR` instead of `DATE`. `DATE_SUB` was separately annotated via the base `_annotate_timeunit`, which handled the string-literal case but mis-annotated a `TIMESTAMP` first arg as `DATETIME` (e.g. `DATE_SUB(TIMESTAMP '...', INTERVAL 1 HOUR)` -> `DATETIME` instead of `TIMESTAMP`). Route all five through a new bigquery `_annotate_date_func`: a typed first arg keeps its exact type (via `_annotate_by_args`), and a string-literal first arg is coerced to the function's own temporal type (`DATE_ADD`/`DATE_SUB`/`DATE_TRUNC` -> `DATE`, `DATETIME_TRUNC` -> `DATETIME`, `TIMESTAMP_TRUNC` -> `TIMESTAMP`), matching BigQuery's implicit coercion. Verified against BigQuery for the whole date/time arithmetic family; the change is bigquery-only and does not affect other dialects. Co-Authored-By: OpenCode google-vertex/claude-opus-4-8@default --- sqlglot/typing/bigquery.py | 42 ++++++++++-- .../fixtures/optimizer/annotate_functions.sql | 68 +++++++++++++++++++ 2 files changed, 106 insertions(+), 4 deletions(-) diff --git a/sqlglot/typing/bigquery.py b/sqlglot/typing/bigquery.py index 4026d51729..5448dcafab 100644 --- a/sqlglot/typing/bigquery.py +++ b/sqlglot/typing/bigquery.py @@ -9,6 +9,34 @@ from sqlglot.optimizer.annotate_types import TypeAnnotator +# DATE_ADD / DATE_SUB / *_TRUNC return the type of their first argument. BigQuery +# implicitly casts a string literal first arg to the function's own temporal type, +# so map each to that type (e.g. DATE_ADD('2020-01-01', ...) -> DATE, +# TIMESTAMP_TRUNC('...') -> TIMESTAMP). +_DATE_FUNC_LITERAL_TYPE: t.Dict[t.Type[exp.Expr], exp.DType] = { + exp.DateAdd: exp.DType.DATE, + exp.DateSub: exp.DType.DATE, + exp.DateTrunc: exp.DType.DATE, + exp.DatetimeTrunc: exp.DType.DATETIME, + exp.TimestampTrunc: exp.DType.TIMESTAMPTZ, +} + + +def _annotate_date_func(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: + """Annotate DATE_ADD / DATE_SUB / *_TRUNC, which return their first arg's type. + + A typed first argument keeps its exact type (e.g. DATE_ADD(DATETIME, ...) -> + DATETIME). For a string literal first argument, BigQuery implicitly casts it to + the function's own temporal type, so the result is that type (e.g. + DATE_ADD('2020-01-01', INTERVAL 1 DAY) -> DATE). + """ + this = expression.this + if isinstance(this, exp.Literal) and this.is_string: + return self._set_type(expression, _DATE_FUNC_LITERAL_TYPE[type(expression)]) + + return self._annotate_by_args(expression, "this") + + def _annotate_math_functions(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: """ Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: @@ -175,9 +203,6 @@ def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: for expr_type in { exp.ArgMax, exp.ArgMin, - exp.DateAdd, - exp.DateTrunc, - exp.DatetimeTrunc, exp.GroupConcat, exp.IgnoreNulls, exp.JSONExtract, @@ -197,12 +222,21 @@ def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: exp.SafeNegate, exp.Sign, exp.Substring, - exp.TimestampTrunc, exp.Translate, exp.Trim, exp.Upper, } }, + **{ + expr_type: {"annotator": lambda self, e: _annotate_date_func(self, e)} + for expr_type in { + exp.DateAdd, + exp.DateSub, + exp.DateTrunc, + exp.DatetimeTrunc, + exp.TimestampTrunc, + } + }, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql index f8a9b9482d..f54ef2397a 100644 --- a/tests/fixtures/optimizer/annotate_functions.sql +++ b/tests/fixtures/optimizer/annotate_functions.sql @@ -2448,6 +2448,74 @@ DATETIME; DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 30 MINUTE); DATETIME; +# dialect: bigquery +DATE_ADD('2008-12-25', INTERVAL 5 DAY); +DATE; + +# dialect: bigquery +DATE_TRUNC('2008-12-25', MONTH); +DATE; + +# dialect: bigquery +DATETIME_TRUNC('2008-12-25', DAY); +DATETIME; + +# dialect: bigquery +DATETIME_TRUNC('2008-12-25 15:30:00', DAY); +DATETIME; + +# dialect: bigquery +TIMESTAMP_TRUNC('2008-12-25 15:30:00', DAY); +TIMESTAMP; + +# dialect: bigquery +TIMESTAMP_TRUNC('2008-12-25', DAY); +TIMESTAMP; + +# dialect: bigquery +DATE_SUB('2008-12-25', INTERVAL 1 MONTH); +DATE; + +# dialect: bigquery +DATE_SUB(DATE '2008-12-25', INTERVAL 1 MONTH); +DATE; + +# dialect: bigquery +DATE_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 1 DAY); +DATETIME; + +# dialect: bigquery +DATE_SUB(TIMESTAMP '2008-12-25 15:30:00', INTERVAL 1 HOUR); +TIMESTAMP; + +# dialect: bigquery +DATETIME_ADD('2008-12-25 15:30:00', INTERVAL 1 DAY); +DATETIME; + +# dialect: bigquery +DATETIME_SUB('2008-12-25 15:30:00', INTERVAL 1 DAY); +DATETIME; + +# dialect: bigquery +TIMESTAMP_ADD('2008-12-25 15:30:00', INTERVAL 1 HOUR); +TIMESTAMP; + +# dialect: bigquery +TIMESTAMP_SUB('2008-12-25 15:30:00', INTERVAL 1 HOUR); +TIMESTAMP; + +# dialect: bigquery +TIME_ADD('08:50:48', INTERVAL 1 HOUR); +TIME; + +# dialect: bigquery +TIME_SUB('08:50:48', INTERVAL 1 HOUR); +TIME; + +# dialect: bigquery +TIME_TRUNC('08:50:48', HOUR); +TIME; + # dialect: bigquery UNIX_DATE(tbl.date_col); BIGINT; From 686ada22a84ce0a6c84897b4d8d44be02895e127 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:04:46 +0300 Subject: [PATCH 2/4] Update sqlglot/typing/bigquery.py --- sqlglot/typing/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/typing/bigquery.py b/sqlglot/typing/bigquery.py index 5448dcafab..ba30dc9032 100644 --- a/sqlglot/typing/bigquery.py +++ b/sqlglot/typing/bigquery.py @@ -13,7 +13,7 @@ # implicitly casts a string literal first arg to the function's own temporal type, # so map each to that type (e.g. DATE_ADD('2020-01-01', ...) -> DATE, # TIMESTAMP_TRUNC('...') -> TIMESTAMP). -_DATE_FUNC_LITERAL_TYPE: t.Dict[t.Type[exp.Expr], exp.DType] = { +_DATE_FUNC_LITERAL_TYPE: dict[type[exp.Expr], exp.DType] = { exp.DateAdd: exp.DType.DATE, exp.DateSub: exp.DType.DATE, exp.DateTrunc: exp.DType.DATE, From 104df44189f242dfc7cf1f343c875ccec0f403d4 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:04:52 +0300 Subject: [PATCH 3/4] Update sqlglot/typing/bigquery.py --- sqlglot/typing/bigquery.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlglot/typing/bigquery.py b/sqlglot/typing/bigquery.py index ba30dc9032..bd7f00abce 100644 --- a/sqlglot/typing/bigquery.py +++ b/sqlglot/typing/bigquery.py @@ -31,6 +31,8 @@ def _annotate_date_func(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: DATE_ADD('2020-01-01', INTERVAL 1 DAY) -> DATE). """ this = expression.this + + # BigQuery rejects expressions like DATE_ADD(c, ...); it requires the first argument to be a literal if isinstance(this, exp.Literal) and this.is_string: return self._set_type(expression, _DATE_FUNC_LITERAL_TYPE[type(expression)]) From d0e2bcf3a953726008c7f2227a1e647d96149463 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:04:59 +0300 Subject: [PATCH 4/4] Update sqlglot/typing/bigquery.py --- sqlglot/typing/bigquery.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sqlglot/typing/bigquery.py b/sqlglot/typing/bigquery.py index bd7f00abce..abf77581e6 100644 --- a/sqlglot/typing/bigquery.py +++ b/sqlglot/typing/bigquery.py @@ -231,13 +231,7 @@ def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: }, **{ expr_type: {"annotator": lambda self, e: _annotate_date_func(self, e)} - for expr_type in { - exp.DateAdd, - exp.DateSub, - exp.DateTrunc, - exp.DatetimeTrunc, - exp.TimestampTrunc, - } + for expr_type in _DATE_FUNC_LITERAL_TYPE }, **{ expr_type: {"returns": exp.DType.BIGINT}