diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..1c86f41 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,46 @@ +# Changelog + +## [0.2.0] - 2026-03-31 + +### Added + +**Generator (MaxCompute SQL output)** + +- `DATEADD` / `DATEADD` with negated delta for `DateSub` (e.g. BigQuery `DATE_SUB` → `DATEADD(dt, -3, 'DAY')`) +- `DATEDIFF` with optional unit argument +- `DATETRUNC` / `TRUNC_TIME` with full week-unit support (`'week'` → `'week(monday)'`) +- `DATEPART` via named `extract_sql` method +- `GETDATE()` for `CurrentTimestamp`, `NOW()` for `CurrentDatetime` +- `TOLOWER` / `TOUPPER` for `Lower` / `Upper` +- `FROM_JSON` for `ParseJSON`, `GET_USER_ID()` for `CurrentUser`, `TO_MILLIS` for `UnixMillis` +- `APPROX_DISTINCT`, `ARG_MAX`, `ARG_MIN` +- `WM_CONCAT(sep, col)` via named `groupconcat_sql` method (with default separator guard) +- `TO_CHAR` via named `tochar_sql` method +- Type mapping: `VARCHAR` / `CHAR` / `TEXT` → `STRING`; `DATETIME` preserved + +**Parser (read direction)** + +- Statistical aggregates: `STDDEV_SAMP`, `COVAR_POP`, `COVAR_SAMP`, `CORR`, `MEDIAN`, `PERCENTILE_APPROX`, `BITWISE_AND_AGG`, `BITWISE_OR_AGG`, `BITWISE_XOR_AGG` +- `MAX_BY` / `MIN_BY` as aliases for `ARG_MAX` / `ARG_MIN` + +### Fixed + +- `TO_DATE` format string preserved as-is (Hive was converting Oracle-style `mm` → `%M`) +- `WEEKDAY` round-trips as `WEEKDAY(dt)` in MaxCompute output (other dialects still get the `(DAYOFWEEK + 5) % 7` arithmetic) +- `DATETRUNC` week-unit emits valid `'week(monday)'` string literal instead of bare `WEEK(MONDAY)` +- `groupconcat_sql` defaults separator to `','` when absent (prevents invalid `WM_CONCAT(col)`) +- Python >=3.9 compatibility (removed `ipykernel`, relaxed `pytest` floor) +- Added `sqlglot<31` upper bound to prevent unexpected breakage on major releases + +### Infrastructure + +- GitHub Actions CI across Python 3.9 / 3.10 / 3.11 / 3.12 +- README with usage examples and function coverage table + +## [0.1.0] - 2026-03-01 + +Initial release. + +- MaxCompute dialect registered via Python entry points +- Parser: ~40 date/time, string, array, and map functions +- DDL: `LIFECYCLE`, `RANGE CLUSTERED BY`, `AUTO PARTITIONED BY` diff --git a/pyproject.toml b/pyproject.toml index b2e9856..ce6066c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sqlglot-maxcompute" -version = "0.1.0" +version = "0.2.0" description = "MaxCompute dialect plugin for SQLGlot" readme = "README.md" license = { text = "MIT" } diff --git a/src/sqlglot_maxcompute/maxcompute.py b/src/sqlglot_maxcompute/maxcompute.py index d0ce16f..398fd9a 100644 --- a/src/sqlglot_maxcompute/maxcompute.py +++ b/src/sqlglot_maxcompute/maxcompute.py @@ -150,6 +150,9 @@ class Parser(Hive.Parser): "SECOND": exp.Second.from_arg_list, "QUARTER": exp.Quarter.from_arg_list, "WEEKDAY": lambda args: exp.paren(exp.DayOfWeek(this=seq_get(args, 0)) + 5, copy=False) % 7, + # MAX_BY / MIN_BY are Spark/Trino names for the same semantics + "MAX_BY": exp.ArgMax.from_arg_list, + "MIN_BY": exp.ArgMin.from_arg_list, "WEEKOFYEAR": exp.WeekOfYear.from_arg_list, # Last/next day "LAST_DAY": exp.LastDay.from_arg_list, @@ -332,6 +335,21 @@ def groupconcat_sql(self, expression: exp.GroupConcat) -> str: def tochar_sql(self, expression: exp.ToChar) -> str: return self.func("TO_CHAR", expression.this, expression.args.get("format")) + def mod_sql(self, expression: exp.Mod) -> str: + # Reverse the WEEKDAY parser transform: (DAYOFWEEK(x) + 5) % 7 → WEEKDAY(x) + rhs = expression.expression + lhs = expression.this + if ( + isinstance(rhs, exp.Literal) and rhs.this == "7" + and isinstance(lhs, exp.Paren) + and isinstance(lhs.this, exp.Add) + and isinstance(lhs.this.this, exp.DayOfWeek) + and isinstance(lhs.this.expression, exp.Literal) + and lhs.this.expression.this == "5" + ): + return self.func("WEEKDAY", lhs.this.this.this) + return super().mod_sql(expression) + def extract_sql(self, expression: exp.Extract) -> str: # Named extract_sql (public) so sqlglot's auto-dispatch picks it up for exp.Extract nodes. unit = expression.this diff --git a/tests/test_maxcompute.py b/tests/test_maxcompute.py index 126c857..ed70cd6 100644 --- a/tests/test_maxcompute.py +++ b/tests/test_maxcompute.py @@ -98,7 +98,7 @@ def test_date_extraction(self): }, ) - # WEEKDAY → (DAYOFWEEK(dt) + 5) % 7 + # WEEKDAY → (DAYOFWEEK(dt) + 5) % 7 in other dialects, but round-trips as WEEKDAY in MaxCompute expr = self.parse_one("WEEKDAY(dt)") self.assertIsInstance(expr, exp.Mod) self.validate_all( @@ -106,6 +106,7 @@ def test_date_extraction(self): write={ "spark": "(DAYOFWEEK(dt) + 5) % 7", "duckdb": "(DAYOFWEEK(dt) + 5) % 7", + "maxcompute": "WEEKDAY(dt)", }, ) @@ -666,7 +667,7 @@ def test_aggregate_roundtrip(self): # APPROX_DISTINCT round-trip self.validate_identity("SELECT APPROX_DISTINCT(x)") - # Cross-dialect: MAX_BY → ARG_MAX, MIN_BY → ARG_MIN + # Cross-dialect: MAX_BY → ARG_MAX, MIN_BY → ARG_MIN (read from Spark) self.validate_all( "SELECT MAX_BY(x, y)", read={"spark": "SELECT MAX_BY(x, y)"}, @@ -678,6 +679,16 @@ def test_aggregate_roundtrip(self): write={"maxcompute": "SELECT ARG_MIN(x, y)"}, ) + # Read MaxCompute MAX_BY/MIN_BY directly (aliases in MaxCompute parser) + self.validate_all( + "SELECT MAX_BY(x, y)", + write={"maxcompute": "SELECT ARG_MAX(x, y)"}, + ) + self.validate_all( + "SELECT MIN_BY(x, y)", + write={"maxcompute": "SELECT ARG_MIN(x, y)"}, + ) + def test_misc_roundtrip(self): # FROM_JSON round-trip self.validate_identity("SELECT FROM_JSON(s, 'schema')") diff --git a/uv.lock b/uv.lock index cc66ffc..92c4496 100644 --- a/uv.lock +++ b/uv.lock @@ -131,7 +131,7 @@ wheels = [ [[package]] name = "sqlglot-maxcompute" -version = "0.1.0" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "sqlglot" }, @@ -144,7 +144,7 @@ dev = [ ] [package.metadata] -requires-dist = [{ name = "sqlglot", specifier = ">=29.0.0" }] +requires-dist = [{ name = "sqlglot", specifier = ">=29.0.0,<31" }] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=7.0" }]