Skip to content

Commit 8ae394b

Browse files
azurechen97claude
andcommitted
fix: model TO_DATE return type — DATE without format, DATETIME with format
TO_DATE(str) returns DATE → maps to TsOrDsToDate (unchanged). TO_DATE(str, fmt) returns DATETIME → maps to StrToTime so cross-dialect consumers see a datetime type; generator emits TO_DATE(str, fmt) for MaxCompute. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 68e5807 commit 8ae394b

3 files changed

Lines changed: 31 additions & 7 deletions

File tree

src/sqlglot_maxcompute/generator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class MaxComputeGenerator(Hive.Generator):
7373
exp.Variance: rename_func("VAR_SAMP"),
7474
# String position: MaxCompute uses INSTR(str, substr), not LOCATE(substr, str)
7575
exp.StrPosition: lambda self, e: self.func("INSTR", e.this, e.args.get("substr")),
76+
# TO_DATE(str, fmt) returns DATETIME — modeled as StrToTime; emit TO_DATE in MaxCompute
77+
exp.StrToTime: lambda self, e: self.func("TO_DATE", e.this, e.args.get("format")),
7678
}
7779

7880
def _dateadd_sql(

src/sqlglot_maxcompute/parser.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,13 @@ class MaxComputeParser(Hive.Parser):
7373
"DATE_FORMAT": lambda args: exp.TimeToStr(
7474
this=seq_get(args, 0), format=seq_get(args, 1)
7575
),
76-
# Hive override: MaxCompute TO_DATE accepts date types directly (no TimeStrToTime wrap)
77-
"TO_DATE": lambda args: exp.TsOrDsToDate(
78-
this=seq_get(args, 0), format=seq_get(args, 1)
76+
# Hive override: TO_DATE return type depends on args:
77+
# TO_DATE(str) → DATE → TsOrDsToDate (no format)
78+
# TO_DATE(str, fmt) → DATETIME → StrToTime (format present)
79+
"TO_DATE": lambda args: (
80+
exp.StrToTime(this=seq_get(args, 0), format=seq_get(args, 1))
81+
if seq_get(args, 1) is not None
82+
else exp.TsOrDsToDate(this=seq_get(args, 0))
7983
),
8084
# Hive override: MaxCompute FROM_UNIXTIME takes 1 arg and returns DATETIME, not STRING
8185
"FROM_UNIXTIME": lambda args: exp.UnixToTime(this=seq_get(args, 0)),

tests/test_maxcompute.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,29 @@ def test_date_conversion(self):
241241
},
242242
)
243243

244-
# TO_DATE: parses without Hive's TimeStrToTime wrapping (format stored as-is)
245-
expr = self.parse_one("TO_DATE('2024-01-01', 'yyyy-mm-dd')")
244+
# TO_DATE without format → DATE (TsOrDsToDate)
245+
expr = self.parse_one("TO_DATE('2024-01-01')")
246246
self.assertIsInstance(expr, exp.TsOrDsToDate)
247-
# Format should be stored as Oracle style, not strftime
248-
self.assertEqual(expr.args.get("format").this, "yyyy-mm-dd")
247+
self.assertIsNone(expr.args.get("format"))
248+
self.validate_all(
249+
"TO_DATE('2024-01-01')",
250+
write={
251+
"maxcompute": "TO_DATE('2024-01-01')",
252+
"spark": "TO_DATE('2024-01-01')",
253+
},
254+
)
255+
256+
# TO_DATE with format → DATETIME (StrToTime); format stored as MaxCompute style, not strftime
257+
expr = self.parse_one("TO_DATE('20240101', 'yyyymmdd')")
258+
self.assertIsInstance(expr, exp.StrToTime)
259+
self.assertEqual(expr.args.get("format").this, "yyyymmdd")
260+
self.validate_all(
261+
"TO_DATE('20240101', 'yyyymmdd')",
262+
write={
263+
"maxcompute": "TO_DATE('20240101', 'yyyymmdd')",
264+
"spark": "TO_TIMESTAMP('20240101', 'yyyymmdd')",
265+
},
266+
)
249267

250268
# TO_CHAR (untyped arg → ToChar)
251269
self.assertIsInstance(self.parse_one("TO_CHAR(dt, 'yyyy-mm-dd')"), exp.ToChar)

0 commit comments

Comments
 (0)