Skip to content

Commit 0f6f68c

Browse files
committed
Merge branch 'main' into codegen_scala_udf
2 parents b1e1c55 + 9c76e87 commit 0f6f68c

3 files changed

Lines changed: 106 additions & 1 deletion

File tree

docs/source/contributor-guide/spark_expressions_support.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@
517517
- [ ] collation
518518
- [x] concat_ws
519519
- [x] contains
520-
- [ ] decode
520+
- [x] decode
521521
- [ ] elt
522522
- [ ] encode
523523
- [x] endswith

docs/source/user-guide/latest/expressions.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ of expressions that be disabled.
6363
| Concat |
6464
| ConcatWs |
6565
| Contains |
66+
| Decode |
6667
| EndsWith |
6768
| InitCap |
6869
| Left |
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- Tests for the SQL `decode` function.
19+
--
20+
-- Spark's `decode` is overloaded:
21+
-- * decode(bin, charset) -> StringDecode (charset binary->string)
22+
-- * decode(expr, search, result, ..., [default]) -> CaseWhen with EqualNullSafe branches
23+
--
24+
-- The Oracle-style form is implemented in Spark via the RuntimeReplaceable trait, so by the
25+
-- time Comet sees the plan the wrapper has already been replaced with CaseWhen and Comet
26+
-- handles it through its existing CaseWhen + EqualNullSafe serde.
27+
--
28+
-- The 2-arg charset form lowers to a cast(binary, string) inside Comet's stringDecode
29+
-- handler, but only when the charset is 'utf-8' (case-insensitive). Other charsets fall
30+
-- back to Spark JVM execution.
31+
32+
-- ===========================================================================
33+
-- Charset form: decode(bin, charset) for UTF-8 (the supported native path)
34+
-- ===========================================================================
35+
36+
statement
37+
CREATE TABLE test_decode_utf8(b binary) USING parquet
38+
39+
statement
40+
INSERT INTO test_decode_utf8 VALUES (CAST('hello' AS BINARY)), (CAST('world' AS BINARY)), (CAST('' AS BINARY)),
41+
(CAST('café' AS BINARY)), (NULL)
42+
43+
query
44+
SELECT decode(b, 'utf-8') FROM test_decode_utf8
45+
46+
query
47+
SELECT decode(b, 'UTF-8') FROM test_decode_utf8
48+
49+
query
50+
SELECT decode(CAST('hello' AS BINARY), 'utf-8'), decode(CAST('' AS BINARY), 'utf-8'), decode(NULL, 'utf-8')
51+
52+
-- Charset form: non-UTF-8
53+
54+
statement
55+
CREATE TABLE test_decode_charset_safe(b binary) USING parquet
56+
57+
statement
58+
INSERT INTO test_decode_charset_safe VALUES (CAST('ab' AS BINARY)), (CAST('abcd' AS BINARY)), (CAST('' AS BINARY)), (NULL)
59+
60+
query expect_fallback(Comet only supports decoding with 'utf-8'.)
61+
SELECT decode(b, 'UTF-16BE') FROM test_decode_charset_safe
62+
63+
query expect_fallback(Comet only supports decoding with 'utf-8'.)
64+
SELECT decode(b, 'US-ASCII') FROM test_decode_charset_safe
65+
66+
query expect_fallback(Comet only supports decoding with 'utf-8'.)
67+
SELECT decode(b, 'ISO-8859-1') FROM test_decode_utf8
68+
69+
70+
statement
71+
CREATE TABLE test_decode_oracle(status string, code int) USING parquet
72+
73+
statement
74+
INSERT INTO test_decode_oracle VALUES ('A', 1), ('I', 2), ('X', 3), (NULL, 4), ('A', NULL)
75+
76+
query
77+
SELECT decode(status, 'A', 'Active', 'I', 'Inactive', 'Other') FROM test_decode_oracle
78+
79+
query
80+
SELECT decode(status, 'A', 'Active', 'I', 'Inactive') FROM test_decode_oracle
81+
82+
query
83+
SELECT decode(code, 1, 'one', 2, 'two', 3, 'three', 'unknown') FROM test_decode_oracle
84+
85+
query
86+
SELECT decode(code, 1, 'one', 2, 'two') FROM test_decode_oracle
87+
88+
query
89+
SELECT decode(status, 'A', 'has-A', NULL, 'is-null', 'other') FROM test_decode_oracle
90+
91+
query
92+
SELECT decode(status, 'A', 'Active') FROM test_decode_oracle
93+
94+
query
95+
SELECT decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 'Other')
96+
97+
query
98+
SELECT decode(6, 1, 'Southlake', 2, 'San Francisco', 'Other')
99+
100+
query
101+
SELECT decode(6, 1, 'Southlake', 2, 'San Francisco')
102+
103+
query
104+
SELECT decode(NULL, 6, 'Spark', NULL, 'SQL', 4, 'rocks')

0 commit comments

Comments
 (0)