@@ -966,6 +966,326 @@ def test_inherited_json_functions(self):
966966 self .validate_identity ("SELECT GET_JSON_OBJECT(s, '$.key')" )
967967 self .validate_identity ("SELECT JSON_TUPLE(s, 'k1', 'k2')" )
968968
969+ # -------------------------------------------------------------------------
970+ # Math functions (comprehensive)
971+ # -------------------------------------------------------------------------
972+
973+ def test_math_functions (self ):
974+ """Arithmetic, exponential, trigonometric, bitwise, and misc math."""
975+ # Basic arithmetic
976+ self .validate_identity ("SELECT ABS(-5)" )
977+ self .validate_identity ("SELECT CEIL(1.5)" )
978+ self .validate_identity ("SELECT FLOOR(1.9)" )
979+ self .validate_identity ("SELECT ROUND(3.14)" )
980+ self .validate_identity ("SELECT ROUND(3.14159, 2)" )
981+ self .validate_identity ("SELECT SQRT(4)" )
982+ # POW → POWER (SQL standard name; both valid in MaxCompute)
983+ self .validate_identity ("SELECT POW(2, 10)" , "SELECT POWER(2, 10)" )
984+ # Exponential / logarithmic
985+ self .validate_identity ("SELECT EXP(1)" )
986+ self .validate_identity ("SELECT LN(1.0)" )
987+ # Single-arg LOG → LN (MaxCompute LOG requires 2 args: LOG(base, x))
988+ self .validate_identity ("SELECT LOG(1.0)" , "SELECT LN(1.0)" )
989+ # LOG10 → LOG(10, x)
990+ self .validate_identity ("SELECT LOG10(100)" , "SELECT LOG(10, 100)" )
991+ self .validate_all (
992+ "SELECT LOG10(100)" ,
993+ write = {"maxcompute" : "SELECT LOG(10, 100)" , "spark" : "SELECT LOG(10, 100)" , "hive" : "SELECT LOG(10, 100)" },
994+ )
995+ # Trigonometric
996+ self .validate_identity ("SELECT SIN(0)" )
997+ self .validate_identity ("SELECT COS(0)" )
998+ self .validate_identity ("SELECT TAN(0)" )
999+ self .validate_identity ("SELECT ASIN(0)" )
1000+ self .validate_identity ("SELECT ACOS(1)" )
1001+ self .validate_identity ("SELECT ATAN(1)" )
1002+ self .validate_identity ("SELECT ATAN2(1, 1)" )
1003+ self .validate_identity ("SELECT DEGREES(3.14159)" )
1004+ self .validate_identity ("SELECT RADIANS(180)" )
1005+ # Constants
1006+ self .validate_identity ("SELECT PI()" )
1007+ self .validate_identity ("SELECT E()" )
1008+ # Sign / polarity
1009+ self .validate_identity ("SELECT SIGN(-5)" )
1010+ self .validate_identity ("SELECT NEGATIVE(-3)" )
1011+ self .validate_identity ("SELECT POSITIVE(3)" )
1012+ # Number base / bitwise
1013+ self .validate_identity ("SELECT BIN(10)" )
1014+ self .validate_identity ("SELECT HEX(255)" )
1015+ self .validate_identity ("SELECT UNHEX('FF')" )
1016+ self .validate_identity ("SELECT CONV('FF', 16, 10)" )
1017+ self .validate_identity ("SELECT SHIFTLEFT(1, 3)" )
1018+ self .validate_identity ("SELECT SHIFTRIGHT(8, 2)" )
1019+ # Misc
1020+ self .validate_identity ("SELECT RAND()" )
1021+ self .validate_identity ("SELECT RAND(42)" )
1022+ self .validate_identity ("SELECT ISNAN(1.0)" )
1023+ self .validate_identity ("SELECT WIDTH_BUCKET(5, 0, 10, 5)" )
1024+ # Cross-dialect: GREATEST/LEAST are universal
1025+ self .validate_all (
1026+ "SELECT GREATEST(a, b, c)" ,
1027+ write = {"maxcompute" : "SELECT GREATEST(a, b, c)" , "spark" : "SELECT GREATEST(a, b, c)" , "duckdb" : "SELECT GREATEST(a, b, c)" },
1028+ )
1029+ self .validate_all (
1030+ "SELECT LEAST(a, b, c)" ,
1031+ write = {"maxcompute" : "SELECT LEAST(a, b, c)" , "spark" : "SELECT LEAST(a, b, c)" , "duckdb" : "SELECT LEAST(a, b, c)" },
1032+ )
1033+
1034+ # -------------------------------------------------------------------------
1035+ # String functions (extended)
1036+ # -------------------------------------------------------------------------
1037+
1038+ def test_string_functions_extended (self ):
1039+ """String functions not covered by test_inherited_string_functions."""
1040+ # Length
1041+ self .validate_identity ("SELECT LENGTH('hello')" )
1042+ self .validate_identity ("SELECT LENGTHB('hello')" )
1043+ # Character
1044+ self .validate_identity ("SELECT ASCII('A')" )
1045+ self .validate_identity ("SELECT CHR(65)" )
1046+ # Trim (LTRIM/RTRIM covered in inherited; TRIM is the two-sided form)
1047+ self .validate_identity ("SELECT TRIM(' hello ')" )
1048+ # Concatenation (CONCAT_WS covered in inherited)
1049+ self .validate_identity ("SELECT CONCAT('a', 'b', 'c')" )
1050+ # Manipulation
1051+ self .validate_identity ("SELECT REPLACE('hello world', 'world', 'there')" )
1052+ self .validate_identity ("SELECT TRANSLATE('hello', 'el', 'ip')" )
1053+ self .validate_identity ("SELECT SOUNDEX('hello')" )
1054+ # Hashing / encoding
1055+ self .validate_identity ("SELECT MD5('hello')" )
1056+ self .validate_identity ("SELECT SHA2('hello', 256)" )
1057+ self .validate_identity ("SELECT BASE64('hello')" )
1058+ self .validate_identity ("SELECT URL_ENCODE('hello world')" )
1059+ self .validate_identity ("SELECT URL_DECODE('hello+world')" )
1060+ self .validate_identity ("SELECT CRC32('hello')" )
1061+ # LOCATE(substr, str) → INSTR(str, substr) — arg order swap
1062+ self .validate_all (
1063+ "LOCATE('lo', 'hello')" ,
1064+ write = {"maxcompute" : "INSTR('hello', 'lo')" },
1065+ )
1066+ # Regexp (REGEXP_REPLACE and REGEXP_EXTRACT_ALL in inherited; adding REGEXP_EXTRACT and COUNT)
1067+ self .validate_identity ("SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)" )
1068+ self .validate_identity ("SELECT REGEXP_COUNT('hello123world456', '[0-9]+')" )
1069+ self .validate_all (
1070+ "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)" ,
1071+ write = {
1072+ "maxcompute" : "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)" ,
1073+ "spark" : "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)" ,
1074+ "hive" : "SELECT REGEXP_EXTRACT('hello123', '[0-9]+', 0)" ,
1075+ },
1076+ )
1077+ # NVL → COALESCE (both work in MaxCompute; normalized to COALESCE)
1078+ self .validate_all (
1079+ "NVL(a, 'default')" ,
1080+ write = {"maxcompute" : "COALESCE(a, 'default')" , "spark" : "COALESCE(a, 'default')" },
1081+ )
1082+ # DECODE → CASE WHEN (MaxCompute DECODE is an Oracle-style alias)
1083+ self .validate_all (
1084+ "DECODE(x, 1, 'one', 2, 'two', 'other')" ,
1085+ write = {
1086+ "maxcompute" : "CASE WHEN x = 1 THEN 'one' WHEN x = 2 THEN 'two' ELSE 'other' END" ,
1087+ "hive" : "CASE WHEN x = 1 THEN 'one' WHEN x = 2 THEN 'two' ELSE 'other' END" ,
1088+ },
1089+ )
1090+
1091+ # -------------------------------------------------------------------------
1092+ # Window functions
1093+ # -------------------------------------------------------------------------
1094+
1095+ def test_window_functions (self ):
1096+ """Ranking, navigation, and aggregate window functions with frames."""
1097+ # Ranking functions
1098+ self .validate_identity ("SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t" )
1099+ self .validate_identity ("SELECT RANK() OVER (ORDER BY b DESC) FROM t" )
1100+ self .validate_identity ("SELECT DENSE_RANK() OVER (PARTITION BY a ORDER BY b) FROM t" )
1101+ self .validate_identity ("SELECT NTILE(4) OVER (ORDER BY b) FROM t" )
1102+ self .validate_identity ("SELECT PERCENT_RANK() OVER (ORDER BY b) FROM t" )
1103+ self .validate_identity ("SELECT CUME_DIST() OVER (ORDER BY b) FROM t" )
1104+ # Navigation functions
1105+ self .validate_identity ("SELECT LAG(col, 1, 0) OVER (ORDER BY b) FROM t" )
1106+ self .validate_identity ("SELECT LEAD(col, 1, NULL) OVER (ORDER BY b) FROM t" )
1107+ self .validate_identity ("SELECT FIRST_VALUE(col) OVER (ORDER BY b) FROM t" )
1108+ self .validate_identity (
1109+ "SELECT LAST_VALUE(col) OVER (ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t"
1110+ )
1111+ self .validate_identity ("SELECT NTH_VALUE(col, 2) OVER (ORDER BY b) FROM t" )
1112+ # Aggregate windows with frame clauses
1113+ self .validate_identity (
1114+ "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) FROM t"
1115+ )
1116+ self .validate_identity ("SELECT AVG(x) OVER (PARTITION BY a) FROM t" )
1117+ self .validate_identity (
1118+ "SELECT COUNT(*) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t"
1119+ )
1120+ self .validate_identity ("SELECT MAX(x) OVER (PARTITION BY a ORDER BY b) FROM t" )
1121+ self .validate_identity ("SELECT MIN(x) OVER (PARTITION BY a ORDER BY b) FROM t" )
1122+ # Cross-dialect: standard window functions are portable
1123+ self .validate_all (
1124+ "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t" ,
1125+ write = {
1126+ "maxcompute" : "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t" ,
1127+ "spark" : "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t" ,
1128+ "hive" : "SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) FROM t" ,
1129+ },
1130+ )
1131+ self .validate_all (
1132+ "SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t" ,
1133+ write = {
1134+ "maxcompute" : "SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t" ,
1135+ "spark" : "SELECT LAG(col, 1, 0) OVER (PARTITION BY a ORDER BY b) FROM t" ,
1136+ },
1137+ )
1138+ self .validate_all (
1139+ "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t" ,
1140+ write = {
1141+ "maxcompute" : "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t" ,
1142+ "spark" : "SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t" ,
1143+ },
1144+ )
1145+
1146+ # -------------------------------------------------------------------------
1147+ # Aggregate functions (extended)
1148+ # -------------------------------------------------------------------------
1149+
1150+ def test_aggregate_functions_extended (self ):
1151+ """Aggregate functions not covered by test_statistical_aggregates or test_inherited_aggregate_functions."""
1152+ # PERCENTILE_CONT / PERCENTILE_DISC (WITHIN GROUP syntax)
1153+ self .validate_identity ("SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x)" )
1154+ self .validate_identity ("SELECT PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY x)" )
1155+ self .validate_all (
1156+ "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary)" ,
1157+ write = {
1158+ "maxcompute" : "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary)" ,
1159+ "spark" : "SELECT PERCENTILE_APPROX(salary, 0.5)" ,
1160+ },
1161+ )
1162+ # Map aggregation
1163+ self .validate_identity ("SELECT MAP_AGG(k, v)" )
1164+ self .validate_all (
1165+ "SELECT MAP_AGG(k, v)" ,
1166+ write = {"maxcompute" : "SELECT MAP_AGG(k, v)" , "spark" : "SELECT MAP_AGG(k, v)" },
1167+ )
1168+ # STR_TO_MAP
1169+ self .validate_identity ("SELECT STR_TO_MAP('a:1,b:2', ',', ':')" )
1170+
1171+ # -------------------------------------------------------------------------
1172+ # Conditional and misc functions
1173+ # -------------------------------------------------------------------------
1174+
1175+ def test_conditional_and_misc (self ):
1176+ """IF, NULLIF, COALESCE, HASH, UUID, and related functions."""
1177+ # IF round-trips in MaxCompute; transpiles to CASE WHEN in dialects that lack it
1178+ self .validate_identity ("IF(a > 1, 'yes', 'no')" )
1179+ self .validate_all (
1180+ "IF(a > 1, 1, 0)" ,
1181+ write = {
1182+ "maxcompute" : "IF(a > 1, 1, 0)" ,
1183+ "spark" : "IF(a > 1, 1, 0)" ,
1184+ "postgres" : "CASE WHEN a > 1 THEN 1 ELSE 0 END" ,
1185+ },
1186+ )
1187+ # NULLIF / COALESCE
1188+ self .validate_identity ("NULLIF(a, 0)" )
1189+ self .validate_identity ("COALESCE(a, b, 'default')" )
1190+ self .validate_all (
1191+ "COALESCE(a, b)" ,
1192+ write = {"maxcompute" : "COALESCE(a, b)" , "spark" : "COALESCE(a, b)" , "postgres" : "COALESCE(a, b)" },
1193+ )
1194+ # Misc
1195+ self .validate_identity ("HASH(a)" )
1196+ self .validate_identity ("UUID()" )
1197+ self .validate_identity ("GET_USER_ID()" )
1198+
1199+ # -------------------------------------------------------------------------
1200+ # Type casting
1201+ # -------------------------------------------------------------------------
1202+
1203+ def test_type_casting (self ):
1204+ """CAST to all MaxCompute data types including complex types."""
1205+ # Numeric
1206+ self .validate_identity ("CAST(x AS BIGINT)" )
1207+ self .validate_identity ("CAST(x AS INT)" )
1208+ self .validate_identity ("CAST(x AS SMALLINT)" )
1209+ self .validate_identity ("CAST(x AS TINYINT)" )
1210+ self .validate_identity ("CAST(x AS FLOAT)" )
1211+ self .validate_identity ("CAST(x AS DOUBLE)" )
1212+ self .validate_identity ("CAST(x AS DECIMAL(10, 2))" )
1213+ self .validate_identity ("CAST(x AS BOOLEAN)" )
1214+ self .validate_identity ("CAST(x AS BINARY)" )
1215+ # String: VARCHAR/CHAR collapse to STRING in MaxCompute
1216+ self .validate_identity ("CAST(x AS STRING)" )
1217+ self .validate_identity ("CAST(x AS VARCHAR(100))" , "CAST(x AS STRING)" )
1218+ self .validate_identity ("CAST(x AS CHAR(10))" , "CAST(x AS STRING)" )
1219+ # Date/time
1220+ self .validate_identity ("CAST(x AS DATE)" )
1221+ self .validate_identity ("CAST(x AS DATETIME)" )
1222+ self .validate_identity ("CAST(x AS TIMESTAMP)" )
1223+ # Complex types
1224+ self .validate_identity ("CAST(x AS ARRAY<STRING>)" )
1225+ self .validate_identity ("CAST(x AS MAP<STRING, BIGINT>)" )
1226+ # Cross-dialect: STRING is the canonical string type in Hive-family dialects
1227+ self .validate_all (
1228+ "CAST(x AS STRING)" ,
1229+ write = {"maxcompute" : "CAST(x AS STRING)" , "spark" : "CAST(x AS STRING)" , "hive" : "CAST(x AS STRING)" },
1230+ )
1231+
1232+ # -------------------------------------------------------------------------
1233+ # DDL (extended)
1234+ # -------------------------------------------------------------------------
1235+
1236+ def test_ddl_extended (self ):
1237+ """DDL: column/table comments, IF NOT EXISTS, STORED AS."""
1238+ # Column and table-level COMMENT
1239+ self .validate_identity (
1240+ "CREATE TABLE t (id BIGINT COMMENT 'primary key', name STRING) COMMENT 'user table'"
1241+ )
1242+ # IF NOT EXISTS
1243+ self .validate_identity ("CREATE TABLE IF NOT EXISTS t (id BIGINT)" )
1244+ # STORED AS: MaxCompute/Hive emit format name only (no STORED AS keyword)
1245+ self .validate_identity (
1246+ "CREATE TABLE t (id BIGINT) STORED AS PARQUET" ,
1247+ "CREATE TABLE t (id BIGINT) PARQUET" ,
1248+ )
1249+ self .validate_identity (
1250+ "CREATE TABLE t (id BIGINT) STORED AS ORC" ,
1251+ "CREATE TABLE t (id BIGINT) ORC" ,
1252+ )
1253+
1254+ # -------------------------------------------------------------------------
1255+ # DML patterns
1256+ # -------------------------------------------------------------------------
1257+
1258+ def test_dml (self ):
1259+ """Core DML: SELECT, JOIN, subquery, set operations."""
1260+ # Basic clauses
1261+ self .validate_identity ("SELECT a, b FROM t WHERE a > 1" )
1262+ self .validate_identity ("SELECT a, COUNT(*) FROM t GROUP BY a HAVING COUNT(*) > 1" )
1263+ self .validate_identity ("SELECT a FROM t ORDER BY a DESC LIMIT 10" )
1264+ self .validate_identity ("SELECT DISTINCT a FROM t" )
1265+ # Joins
1266+ self .validate_identity ("SELECT a FROM t1 JOIN t2 ON t1.id = t2.id" )
1267+ self .validate_identity ("SELECT a FROM t1 LEFT JOIN t2 ON t1.id = t2.id" )
1268+ self .validate_identity ("SELECT a FROM t1 RIGHT JOIN t2 ON t1.id = t2.id" )
1269+ self .validate_identity ("SELECT a FROM t1 CROSS JOIN t2" )
1270+ # Subquery
1271+ self .validate_identity ("SELECT * FROM (SELECT a FROM t) AS sub" )
1272+ self .validate_identity ("SELECT a FROM t WHERE a IN (SELECT b FROM t2)" )
1273+ # Set operations
1274+ self .validate_identity ("SELECT a FROM t1 UNION ALL SELECT a FROM t2" )
1275+ self .validate_identity ("SELECT a FROM t1 UNION SELECT a FROM t2" )
1276+ self .validate_identity ("SELECT a FROM t1 INTERSECT SELECT a FROM t2" )
1277+ self .validate_identity ("SELECT a FROM t1 EXCEPT SELECT a FROM t2" )
1278+ # Expressions
1279+ self .validate_identity ("SELECT a IS NULL FROM t" )
1280+ # Hive/MaxCompute normalizes IS NOT NULL → NOT ... IS NULL
1281+ self .validate_identity ("SELECT a IS NOT NULL FROM t" , "SELECT NOT a IS NULL FROM t" )
1282+ self .validate_identity ("SELECT CASE WHEN a > 0 THEN 'pos' WHEN a < 0 THEN 'neg' ELSE 'zero' END FROM t" )
1283+ self .validate_identity ("SELECT a BETWEEN 1 AND 10 FROM t" )
1284+ # Hive/MaxCompute normalizes NOT BETWEEN → NOT ... BETWEEN
1285+ self .validate_identity ("SELECT a NOT BETWEEN 1 AND 10 FROM t" , "SELECT NOT a BETWEEN 1 AND 10 FROM t" )
1286+ self .validate_identity ("SELECT a LIKE '%hello%' FROM t" )
1287+ self .validate_identity ("SELECT a RLIKE '^[0-9]+$' FROM t" )
1288+
9691289
9701290if __name__ == "__main__" :
9711291 unittest .main ()
0 commit comments