Skip to content

Commit 4c7bf03

Browse files
committed
Fix string concat incompatibility from UTF-8 charset on literals
The previous fix added UTF-8 charset only to string literals in visitLiteral(), leaving column VARCHAR types with no charset. Calcite then rejected string concatenation (e.g. 'Hello ' + firstname) with: VARCHAR CHARACTER SET "UTF-8" NOT NULL is not comparable to VARCHAR Fix: move the UTF-8 + IMPLICIT collation enforcement into OpenSearchTypeFactory.createSqlType() for VARCHAR/CHAR so both column types and literal types carry the same charset consistently. visitLiteral() reverts to plain createSqlType() calls since the factory now handles encoding globally. Signed-off-by: Radhakrishnan Pachyappan <gingeekrishna@gmail.com>
1 parent 2afb328 commit 4c7bf03

2 files changed

Lines changed: 20 additions & 16 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
1313

1414
import java.math.BigDecimal;
15-
import java.nio.charset.StandardCharsets;
1615
import java.util.ArrayList;
1716
import java.util.Collections;
1817
import java.util.HashMap;
@@ -32,7 +31,6 @@
3231
import org.apache.calcite.rex.RexLambdaRef;
3332
import org.apache.calcite.rex.RexLiteral;
3433
import org.apache.calcite.rex.RexNode;
35-
import org.apache.calcite.sql.SqlCollation;
3634
import org.apache.calcite.sql.SqlIntervalQualifier;
3735
import org.apache.calcite.sql.SqlOperator;
3836
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
@@ -133,28 +131,20 @@ public RexNode visitLiteral(Literal node, CalcitePlanContext context) {
133131
case NULL:
134132
return rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.NULL));
135133
case STRING:
136-
// Use UTF-8 explicitly so non-ASCII characters (e.g. Chinese, Arabic) are not rejected
137-
// by Calcite's NlsString, which otherwise defaults to ISO-8859-1.
134+
// UTF-8 charset is applied globally by OpenSearchTypeFactory.createSqlType() for
135+
// VARCHAR/CHAR, so non-ASCII characters (e.g. Chinese, Arabic) are handled correctly
136+
// and literal types stay compatible with column types for string operations.
138137
if (value.toString().length() == 1) {
139138
// To align Spark/PostgreSQL, Char(1) is useful, such as cast('1' to boolean) should
140139
// return true
141140
return rexBuilder.makeLiteral(
142-
value.toString(),
143-
typeFactory.createTypeWithCharsetAndCollation(
144-
typeFactory.createSqlType(SqlTypeName.CHAR),
145-
StandardCharsets.UTF_8,
146-
SqlCollation.IMPLICIT));
141+
value.toString(), typeFactory.createSqlType(SqlTypeName.CHAR));
147142
} else {
148143
// Specific the type to VARCHAR and allowCast to true, or the STRING will be optimized to
149144
// CHAR(n)
150145
// which leads to incorrect return type in deriveReturnType of some functions/operators
151146
return rexBuilder.makeLiteral(
152-
value.toString(),
153-
typeFactory.createTypeWithCharsetAndCollation(
154-
typeFactory.createSqlType(SqlTypeName.VARCHAR),
155-
StandardCharsets.UTF_8,
156-
SqlCollation.IMPLICIT),
157-
true);
147+
value.toString(), typeFactory.createSqlType(SqlTypeName.VARCHAR), true);
158148
}
159149
case INTEGER:
160150
return rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));

core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
import java.lang.reflect.Type;
3434
import java.nio.charset.Charset;
35+
import java.nio.charset.StandardCharsets;
3536
import java.util.ArrayList;
3637
import java.util.LinkedHashMap;
3738
import java.util.List;
@@ -104,8 +105,21 @@ public RelDataType createTypeWithCharsetAndCollation(
104105
return super.createTypeWithCharsetAndCollation(type, charset, collation);
105106
}
106107

108+
@Override
109+
public RelDataType createSqlType(SqlTypeName typeName) {
110+
RelDataType type = super.createSqlType(typeName);
111+
if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) {
112+
return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT);
113+
}
114+
return type;
115+
}
116+
107117
public RelDataType createSqlType(SqlTypeName typeName, boolean nullable) {
108-
return createTypeWithNullability(super.createSqlType(typeName), nullable);
118+
RelDataType type = createTypeWithNullability(super.createSqlType(typeName), nullable);
119+
if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) {
120+
return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT);
121+
}
122+
return type;
109123
}
110124

111125
public RelDataType createStructType(

0 commit comments

Comments
 (0)