|
| 1 | +-- Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +-- or more contributor license agreements. See the NOTICE file |
| 3 | +-- distributed with this work for additional information |
| 4 | +-- regarding copyright ownership. The ASF licenses this file |
| 5 | +-- to you under the Apache License, Version 2.0 (the |
| 6 | +-- "License"); you may not use this file except in compliance |
| 7 | +-- with the License. You may obtain a copy of the License at |
| 8 | +-- |
| 9 | +-- http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +-- |
| 11 | +-- Unless required by applicable law or agreed to in writing, |
| 12 | +-- software distributed under the License is distributed on an |
| 13 | +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +-- KIND, either express or implied. See the License for the |
| 15 | +-- specific language governing permissions and limitations |
| 16 | +-- under the License. |
| 17 | + |
| 18 | +-- Tests for the SQL `decode` function. |
| 19 | +-- |
| 20 | +-- Spark's `decode` is overloaded: |
| 21 | +-- * decode(bin, charset) -> StringDecode (charset binary->string) |
| 22 | +-- * decode(expr, search, result, ..., [default]) -> CaseWhen with EqualNullSafe branches |
| 23 | +-- |
| 24 | +-- The Oracle-style form is implemented in Spark via the RuntimeReplaceable trait, so by the |
| 25 | +-- time Comet sees the plan the wrapper has already been replaced with CaseWhen and Comet |
| 26 | +-- handles it through its existing CaseWhen + EqualNullSafe serde. |
| 27 | +-- |
| 28 | +-- The 2-arg charset form lowers to a cast(binary, string) inside Comet's stringDecode |
| 29 | +-- handler, but only when the charset is 'utf-8' (case-insensitive). Other charsets fall |
| 30 | +-- back to Spark JVM execution. |
| 31 | + |
| 32 | +-- =========================================================================== |
| 33 | +-- Charset form: decode(bin, charset) for UTF-8 (the supported native path) |
| 34 | +-- =========================================================================== |
| 35 | + |
| 36 | +statement |
| 37 | +CREATE TABLE test_decode_utf8(b binary) USING parquet |
| 38 | + |
| 39 | +statement |
| 40 | +INSERT INTO test_decode_utf8 VALUES (CAST('hello' AS BINARY)), (CAST('world' AS BINARY)), (CAST('' AS BINARY)), |
| 41 | + (CAST('café' AS BINARY)), (NULL) |
| 42 | + |
| 43 | +query |
| 44 | +SELECT decode(b, 'utf-8') FROM test_decode_utf8 |
| 45 | + |
| 46 | +query |
| 47 | +SELECT decode(b, 'UTF-8') FROM test_decode_utf8 |
| 48 | + |
| 49 | +query |
| 50 | +SELECT decode(CAST('hello' AS BINARY), 'utf-8'), decode(CAST('' AS BINARY), 'utf-8'), decode(NULL, 'utf-8') |
| 51 | + |
| 52 | +-- Charset form: non-UTF-8 |
| 53 | + |
| 54 | +statement |
| 55 | +CREATE TABLE test_decode_charset_safe(b binary) USING parquet |
| 56 | + |
| 57 | +statement |
| 58 | +INSERT INTO test_decode_charset_safe VALUES (CAST('ab' AS BINARY)), (CAST('abcd' AS BINARY)), (CAST('' AS BINARY)), (NULL) |
| 59 | + |
| 60 | +query expect_fallback(Comet only supports decoding with 'utf-8'.) |
| 61 | +SELECT decode(b, 'UTF-16BE') FROM test_decode_charset_safe |
| 62 | + |
| 63 | +query expect_fallback(Comet only supports decoding with 'utf-8'.) |
| 64 | +SELECT decode(b, 'US-ASCII') FROM test_decode_charset_safe |
| 65 | + |
| 66 | +query expect_fallback(Comet only supports decoding with 'utf-8'.) |
| 67 | +SELECT decode(b, 'ISO-8859-1') FROM test_decode_utf8 |
| 68 | + |
| 69 | + |
| 70 | +statement |
| 71 | +CREATE TABLE test_decode_oracle(status string, code int) USING parquet |
| 72 | + |
| 73 | +statement |
| 74 | +INSERT INTO test_decode_oracle VALUES ('A', 1), ('I', 2), ('X', 3), (NULL, 4), ('A', NULL) |
| 75 | + |
| 76 | +query |
| 77 | +SELECT decode(status, 'A', 'Active', 'I', 'Inactive', 'Other') FROM test_decode_oracle |
| 78 | + |
| 79 | +query |
| 80 | +SELECT decode(status, 'A', 'Active', 'I', 'Inactive') FROM test_decode_oracle |
| 81 | + |
| 82 | +query |
| 83 | +SELECT decode(code, 1, 'one', 2, 'two', 3, 'three', 'unknown') FROM test_decode_oracle |
| 84 | + |
| 85 | +query |
| 86 | +SELECT decode(code, 1, 'one', 2, 'two') FROM test_decode_oracle |
| 87 | + |
| 88 | +query |
| 89 | +SELECT decode(status, 'A', 'has-A', NULL, 'is-null', 'other') FROM test_decode_oracle |
| 90 | + |
| 91 | +query |
| 92 | +SELECT decode(status, 'A', 'Active') FROM test_decode_oracle |
| 93 | + |
| 94 | +query |
| 95 | +SELECT decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 'Other') |
| 96 | + |
| 97 | +query |
| 98 | +SELECT decode(6, 1, 'Southlake', 2, 'San Francisco', 'Other') |
| 99 | + |
| 100 | +query |
| 101 | +SELECT decode(6, 1, 'Southlake', 2, 'San Francisco') |
| 102 | + |
| 103 | +query |
| 104 | +SELECT decode(NULL, 6, 'Spark', NULL, 'SQL', 4, 'rocks') |
0 commit comments