Skip to content

Commit 75aba3d

Browse files
azurechen97claude
andcommitted
test: comprehensive coverage — math, string, window, aggregate, conditional, casting, DDL, DML
Adds 8 new test methods (39 tests total, up from 31): - test_math_functions: ABS/CEIL/FLOOR/ROUND/SQRT, trig, log, bitwise, constants, RAND, WIDTH_BUCKET - test_string_functions_extended: LENGTH/CHR/TRIM/CONCAT/REPLACE/TRANSLATE/SOUNDEX/MD5/SHA2/BASE64/URL_ENCODE/REGEXP_EXTRACT/REGEXP_COUNT plus LOCATE→INSTR and DECODE→CASE WHEN cross-dialect - test_window_functions: ROW_NUMBER/RANK/DENSE_RANK/NTILE/PERCENT_RANK/CUME_DIST, LAG/LEAD/FIRST_VALUE/LAST_VALUE/NTH_VALUE, aggregate windows with ROWS/RANGE frames - test_aggregate_functions_extended: PERCENTILE_CONT/DISC (WITHIN GROUP), MAP_AGG, STR_TO_MAP - test_conditional_and_misc: IF cross-dialect, NULLIF, COALESCE, HASH, UUID - test_type_casting: all numeric/string/date/complex types, VARCHAR/CHAR→STRING collapsing - test_ddl_extended: column+table COMMENT, IF NOT EXISTS, STORED AS PARQUET/ORC - test_dml: WHERE/GROUP BY/HAVING/ORDER BY/LIMIT, JOIN types, subquery, set ops, BETWEEN, LIKE, RLIKE Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d55378b commit 75aba3d

1 file changed

Lines changed: 320 additions & 0 deletions

File tree

tests/test_maxcompute.py

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,326 @@ def test_inherited_json_functions(self):
966966
self.validate_identity("SELECT GET_JSON_OBJECT(s, '$.key')")
967967
self.validate_identity("SELECT JSON_TUPLE(s, 'k1', 'k2')")
968968

969+
# -------------------------------------------------------------------------
970+
# Math functions (comprehensive)
971+
# -------------------------------------------------------------------------
972+
973+
def test_math_functions(self):
974+
"""Arithmetic, exponential, trigonometric, bitwise, and misc math."""
975+
# Basic arithmetic
976+
self.validate_identity("SELECT ABS(-5)")
977+
self.validate_identity("SELECT CEIL(1.5)")
978+
self.validate_identity("SELECT FLOOR(1.9)")
979+
self.validate_identity("SELECT ROUND(3.14)")
980+
self.validate_identity("SELECT ROUND(3.14159, 2)")
981+
self.validate_identity("SELECT SQRT(4)")
982+
# POW → POWER (SQL standard name; both valid in MaxCompute)
983+
self.validate_identity("SELECT POW(2, 10)", "SELECT POWER(2, 10)")
984+
# Exponential / logarithmic
985+
self.validate_identity("SELECT EXP(1)")
986+
self.validate_identity("SELECT LN(1.0)")
987+
# Single-arg LOG → LN (MaxCompute LOG requires 2 args: LOG(base, x))
988+
self.validate_identity("SELECT LOG(1.0)", "SELECT LN(1.0)")
989+
# LOG10 → LOG(10, x)
990+
self.validate_identity("SELECT LOG10(100)", "SELECT LOG(10, 100)")
991+
self.validate_all(
992+
"SELECT LOG10(100)",
993+
write={"maxcompute": "SELECT LOG(10, 100)", "spark": "SELECT LOG(10, 100)", "hive": "SELECT LOG(10, 100)"},
994+
)
995+
# Trigonometric
996+
self.validate_identity("SELECT SIN(0)")
997+
self.validate_identity("SELECT COS(0)")
998+
self.validate_identity("SELECT TAN(0)")
999+
self.validate_identity("SELECT ASIN(0)")
1000+
self.validate_identity("SELECT ACOS(1)")
1001+
self.validate_identity("SELECT ATAN(1)")
1002+
self.validate_identity("SELECT ATAN2(1, 1)")
1003+
self.validate_identity("SELECT DEGREES(3.14159)")
1004+
self.validate_identity("SELECT RADIANS(180)")
1005+
# Constants
1006+
self.validate_identity("SELECT PI()")
1007+
self.validate_identity("SELECT E()")
1008+
# Sign / polarity
1009+
self.validate_identity("SELECT SIGN(-5)")
1010+
self.validate_identity("SELECT NEGATIVE(-3)")
1011+
self.validate_identity("SELECT POSITIVE(3)")
1012+
# Number base / bitwise
1013+
self.validate_identity("SELECT BIN(10)")
1014+
self.validate_identity("SELECT HEX(255)")
1015+
self.validate_identity("SELECT UNHEX('FF')")
1016+
self.validate_identity("SELECT CONV('FF', 16, 10)")
1017+
self.validate_identity("SELECT SHIFTLEFT(1, 3)")
1018+
self.validate_identity("SELECT SHIFTRIGHT(8, 2)")
1019+
# Misc
1020+
self.validate_identity("SELECT RAND()")
1021+
self.validate_identity("SELECT RAND(42)")
1022+
self.validate_identity("SELECT ISNAN(1.0)")
1023+
self.validate_identity("SELECT WIDTH_BUCKET(5, 0, 10, 5)")
1024+
# Cross-dialect: GREATEST/LEAST are universal
1025+
self.validate_all(
1026+
"SELECT GREATEST(a, b, c)",
1027+
write={"maxcompute": "SELECT GREATEST(a, b, c)", "spark": "SELECT GREATEST(a, b, c)", "duckdb": "SELECT GREATEST(a, b, c)"},
1028+
)
1029+
self.validate_all(
1030+
"SELECT LEAST(a, b, c)",
1031+
write={"maxcompute": "SELECT LEAST(a, b, c)", "spark": "SELECT LEAST(a, b, c)", "duckdb": "SELECT LEAST(a, b, c)"},
1032+
)
1033+
1034+
# -------------------------------------------------------------------------
1035+
# String functions (extended)
1036+
# -------------------------------------------------------------------------
1037+
1038+
def test_string_functions_extended(self):
1039+
"""String functions not covered by test_inherited_string_functions."""
1040+
# Length
1041+
self.validate_identity("SELECT LENGTH('hello')")
1042+
self.validate_identity("SELECT LENGTHB('hello')")
1043+
# Character
1044+
self.validate_identity("SELECT ASCII('A')")
1045+
self.validate_identity("SELECT CHR(65)")
1046+
# Trim (LTRIM/RTRIM covered in inherited; TRIM is the two-sided form)
1047+
self.validate_identity("SELECT TRIM(' hello ')")
1048+
# Concatenation (CONCAT_WS covered in inherited)
1049+
self.validate_identity("SELECT CONCAT('a', 'b', 'c')")
1050+
# Manipulation
1051+
self.validate_identity("SELECT REPLACE('hello world', 'world', 'there')")
1052+
self.validate_identity("SELECT TRANSLATE('hello', 'el', 'ip')")
1053+
self.validate_identity("SELECT SOUNDEX('hello')")
1054+
# Hashing / encoding
1055+
self.validate_identity("SELECT MD5('hello')")
1056+
self.validate_identity("SELECT SHA2('hello', 256)")
1057+
self.validate_identity("SELECT BASE64('hello')")
1058+
self.validate_identity("SELECT URL_ENCODE('hello world')")
1059+
self.validate_identity("SELECT URL_DECODE('hello+world')")
1060+
self.validate_identity("SELECT CRC32('hello')")
1061+
# LOCATE(substr, str) → INSTR(str, substr) — arg order swap
1062+
self.validate_all(
1063+
"LOCATE('lo', 'hello')",
1064+
write={"maxcompute": "INSTR('hello', 'lo')"},
1065+
)
1066+
# Regexp (REGEXP_REPLACE and REGEXP_EXTRACT_ALL in inherited; adding REGEXP_EXTRACT and COUNT)
1067+
self.validate_identity("SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)")
1068+
self.validate_identity("SELECT REGEXP_COUNT('hello123world456', '[0-9]+')")
1069+
self.validate_all(
1070+
"SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)",
1071+
write={
1072+
"maxcompute": "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)",
1073+
"spark": "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)",
1074+
"hive": "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)",
1075+
},
1076+
)
1077+
# NVL → COALESCE (both work in MaxCompute; normalized to COALESCE)
1078+
self.validate_all(
1079+
"NVL(a, 'default')",
1080+
write={"maxcompute": "COALESCE(a, 'default')", "spark": "COALESCE(a, 'default')"},
1081+
)
1082+
# DECODE → CASE WHEN (MaxCompute DECODE is an Oracle-style alias)
1083+
self.validate_all(
1084+
"DECODE(x, 1, 'one', 2, 'two', 'other')",
1085+
write={
1086+
"maxcompute": "CASE WHEN x = 1 THEN 'one' WHEN x = 2 THEN 'two' ELSE 'other' END",
1087+
"hive": "CASE WHEN x = 1 THEN 'one' WHEN x = 2 THEN 'two' ELSE 'other' END",
1088+
},
1089+
)
1090+
1091+
# -------------------------------------------------------------------------
1092+
# Window functions
1093+
# -------------------------------------------------------------------------
1094+
1095+
def test_window_functions(self):
1096+
"""Ranking, navigation, and aggregate window functions with frames."""
1097+
# Ranking functions
1098+
self.validate_identity("SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t")
1099+
self.validate_identity("SELECT RANK() OVER (ORDER BY b DESC) FROM t")
1100+
self.validate_identity("SELECT DENSE_RANK() OVER (PARTITION BY a ORDER BY b) FROM t")
1101+
self.validate_identity("SELECT NTILE(4) OVER (ORDER BY b) FROM t")
1102+
self.validate_identity("SELECT PERCENT_RANK() OVER (ORDER BY b) FROM t")
1103+
self.validate_identity("SELECT CUME_DIST() OVER (ORDER BY b) FROM t")
1104+
# Navigation functions
1105+
self.validate_identity("SELECT LAG(col, 1, 0) OVER (ORDER BY b) FROM t")
1106+
self.validate_identity("SELECT LEAD(col, 1, NULL) OVER (ORDER BY b) FROM t")
1107+
self.validate_identity("SELECT FIRST_VALUE(col) OVER (ORDER BY b) FROM t")
1108+
self.validate_identity(
1109+
"SELECT LAST_VALUE(col) OVER (ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t"
1110+
)
1111+
self.validate_identity("SELECT NTH_VALUE(col, 2) OVER (ORDER BY b) FROM t")
1112+
# Aggregate windows with frame clauses
1113+
self.validate_identity(
1114+
"SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) FROM t"
1115+
)
1116+
self.validate_identity("SELECT AVG(x) OVER (PARTITION BY a) FROM t")
1117+
self.validate_identity(
1118+
"SELECT COUNT(*) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t"
1119+
)
1120+
self.validate_identity("SELECT MAX(x) OVER (PARTITION BY a ORDER BY b) FROM t")
1121+
self.validate_identity("SELECT MIN(x) OVER (PARTITION BY a ORDER BY b) FROM t")
1122+
# Cross-dialect: standard window functions are portable
1123+
self.validate_all(
1124+
"SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t",
1125+
write={
1126+
"maxcompute": "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t",
1127+
"spark": "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t",
1128+
"hive": "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t",
1129+
},
1130+
)
1131+
self.validate_all(
1132+
"SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t",
1133+
write={
1134+
"maxcompute": "SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t",
1135+
"spark": "SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t",
1136+
},
1137+
)
1138+
self.validate_all(
1139+
"SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t",
1140+
write={
1141+
"maxcompute": "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t",
1142+
"spark": "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t",
1143+
},
1144+
)
1145+
1146+
# -------------------------------------------------------------------------
1147+
# Aggregate functions (extended)
1148+
# -------------------------------------------------------------------------
1149+
1150+
def test_aggregate_functions_extended(self):
1151+
"""Aggregate functions not covered by test_statistical_aggregates or test_inherited_aggregate_functions."""
1152+
# PERCENTILE_CONT / PERCENTILE_DISC (WITHIN GROUP syntax)
1153+
self.validate_identity("SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x)")
1154+
self.validate_identity("SELECT PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY x)")
1155+
self.validate_all(
1156+
"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary)",
1157+
write={
1158+
"maxcompute": "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary)",
1159+
"spark": "SELECT PERCENTILE_APPROX(salary, 0.5)",
1160+
},
1161+
)
1162+
# Map aggregation
1163+
self.validate_identity("SELECT MAP_AGG(k, v)")
1164+
self.validate_all(
1165+
"SELECT MAP_AGG(k, v)",
1166+
write={"maxcompute": "SELECT MAP_AGG(k, v)", "spark": "SELECT MAP_AGG(k, v)"},
1167+
)
1168+
# STR_TO_MAP
1169+
self.validate_identity("SELECT STR_TO_MAP('a:1,b:2', ',', ':')")
1170+
1171+
# -------------------------------------------------------------------------
1172+
# Conditional and misc functions
1173+
# -------------------------------------------------------------------------
1174+
1175+
def test_conditional_and_misc(self):
1176+
"""IF, NULLIF, COALESCE, HASH, UUID, and related functions."""
1177+
# IF round-trips in MaxCompute; transpiles to CASE WHEN in dialects that lack it
1178+
self.validate_identity("IF(a > 1, 'yes', 'no')")
1179+
self.validate_all(
1180+
"IF(a > 1, 1, 0)",
1181+
write={
1182+
"maxcompute": "IF(a > 1, 1, 0)",
1183+
"spark": "IF(a > 1, 1, 0)",
1184+
"postgres": "CASE WHEN a > 1 THEN 1 ELSE 0 END",
1185+
},
1186+
)
1187+
# NULLIF / COALESCE
1188+
self.validate_identity("NULLIF(a, 0)")
1189+
self.validate_identity("COALESCE(a, b, 'default')")
1190+
self.validate_all(
1191+
"COALESCE(a, b)",
1192+
write={"maxcompute": "COALESCE(a, b)", "spark": "COALESCE(a, b)", "postgres": "COALESCE(a, b)"},
1193+
)
1194+
# Misc
1195+
self.validate_identity("HASH(a)")
1196+
self.validate_identity("UUID()")
1197+
self.validate_identity("GET_USER_ID()")
1198+
1199+
# -------------------------------------------------------------------------
1200+
# Type casting
1201+
# -------------------------------------------------------------------------
1202+
1203+
def test_type_casting(self):
1204+
"""CAST to all MaxCompute data types including complex types."""
1205+
# Numeric
1206+
self.validate_identity("CAST(x AS BIGINT)")
1207+
self.validate_identity("CAST(x AS INT)")
1208+
self.validate_identity("CAST(x AS SMALLINT)")
1209+
self.validate_identity("CAST(x AS TINYINT)")
1210+
self.validate_identity("CAST(x AS FLOAT)")
1211+
self.validate_identity("CAST(x AS DOUBLE)")
1212+
self.validate_identity("CAST(x AS DECIMAL(10, 2))")
1213+
self.validate_identity("CAST(x AS BOOLEAN)")
1214+
self.validate_identity("CAST(x AS BINARY)")
1215+
# String: VARCHAR/CHAR collapse to STRING in MaxCompute
1216+
self.validate_identity("CAST(x AS STRING)")
1217+
self.validate_identity("CAST(x AS VARCHAR(100))", "CAST(x AS STRING)")
1218+
self.validate_identity("CAST(x AS CHAR(10))", "CAST(x AS STRING)")
1219+
# Date/time
1220+
self.validate_identity("CAST(x AS DATE)")
1221+
self.validate_identity("CAST(x AS DATETIME)")
1222+
self.validate_identity("CAST(x AS TIMESTAMP)")
1223+
# Complex types
1224+
self.validate_identity("CAST(x AS ARRAY<STRING>)")
1225+
self.validate_identity("CAST(x AS MAP<STRING, BIGINT>)")
1226+
# Cross-dialect: STRING is the canonical string type in Hive-family dialects
1227+
self.validate_all(
1228+
"CAST(x AS STRING)",
1229+
write={"maxcompute": "CAST(x AS STRING)", "spark": "CAST(x AS STRING)", "hive": "CAST(x AS STRING)"},
1230+
)
1231+
1232+
# -------------------------------------------------------------------------
1233+
# DDL (extended)
1234+
# -------------------------------------------------------------------------
1235+
1236+
def test_ddl_extended(self):
1237+
"""DDL: column/table comments, IF NOT EXISTS, STORED AS."""
1238+
# Column and table-level COMMENT
1239+
self.validate_identity(
1240+
"CREATE TABLE t (id BIGINT COMMENT 'primary key', name STRING) COMMENT 'user table'"
1241+
)
1242+
# IF NOT EXISTS
1243+
self.validate_identity("CREATE TABLE IF NOT EXISTS t (id BIGINT)")
1244+
# STORED AS: MaxCompute/Hive emit format name only (no STORED AS keyword)
1245+
self.validate_identity(
1246+
"CREATE TABLE t (id BIGINT) STORED AS PARQUET",
1247+
"CREATE TABLE t (id BIGINT) PARQUET",
1248+
)
1249+
self.validate_identity(
1250+
"CREATE TABLE t (id BIGINT) STORED AS ORC",
1251+
"CREATE TABLE t (id BIGINT) ORC",
1252+
)
1253+
1254+
# -------------------------------------------------------------------------
1255+
# DML patterns
1256+
# -------------------------------------------------------------------------
1257+
1258+
def test_dml(self):
1259+
"""Core DML: SELECT, JOIN, subquery, set operations."""
1260+
# Basic clauses
1261+
self.validate_identity("SELECT a, b FROM t WHERE a > 1")
1262+
self.validate_identity("SELECT a, COUNT(*) FROM t GROUP BY a HAVING COUNT(*) > 1")
1263+
self.validate_identity("SELECT a FROM t ORDER BY a DESC LIMIT 10")
1264+
self.validate_identity("SELECT DISTINCT a FROM t")
1265+
# Joins
1266+
self.validate_identity("SELECT a FROM t1 JOIN t2 ON t1.id = t2.id")
1267+
self.validate_identity("SELECT a FROM t1 LEFT JOIN t2 ON t1.id = t2.id")
1268+
self.validate_identity("SELECT a FROM t1 RIGHT JOIN t2 ON t1.id = t2.id")
1269+
self.validate_identity("SELECT a FROM t1 CROSS JOIN t2")
1270+
# Subquery
1271+
self.validate_identity("SELECT * FROM (SELECT a FROM t) AS sub")
1272+
self.validate_identity("SELECT a FROM t WHERE a IN (SELECT b FROM t2)")
1273+
# Set operations
1274+
self.validate_identity("SELECT a FROM t1 UNION ALL SELECT a FROM t2")
1275+
self.validate_identity("SELECT a FROM t1 UNION SELECT a FROM t2")
1276+
self.validate_identity("SELECT a FROM t1 INTERSECT SELECT a FROM t2")
1277+
self.validate_identity("SELECT a FROM t1 EXCEPT SELECT a FROM t2")
1278+
# Expressions
1279+
self.validate_identity("SELECT a IS NULL FROM t")
1280+
# Hive/MaxCompute normalizes IS NOT NULL → NOT ... IS NULL
1281+
self.validate_identity("SELECT a IS NOT NULL FROM t", "SELECT NOT a IS NULL FROM t")
1282+
self.validate_identity("SELECT CASE WHEN a > 0 THEN 'pos' WHEN a < 0 THEN 'neg' ELSE 'zero' END FROM t")
1283+
self.validate_identity("SELECT a BETWEEN 1 AND 10 FROM t")
1284+
# Hive/MaxCompute normalizes NOT BETWEEN → NOT ... BETWEEN
1285+
self.validate_identity("SELECT a NOT BETWEEN 1 AND 10 FROM t", "SELECT NOT a BETWEEN 1 AND 10 FROM t")
1286+
self.validate_identity("SELECT a LIKE '%hello%' FROM t")
1287+
self.validate_identity("SELECT a RLIKE '^[0-9]+$' FROM t")
1288+
9691289

9701290
if __name__ == "__main__":
9711291
unittest.main()

0 commit comments

Comments
 (0)