Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,24 @@ public Object result(ListAccumulator accumulator) {

@Override
public ListAccumulator add(ListAccumulator acc, Object... values) {
// Handle case where no values are passed
if (values == null || values.length == 0) {
return acc;
}

Object value = values[0];

// Filter out null values and enforce 100-item limit
// Preserve raw element type so the result matches the ARG0_ARRAY return-type
// declaration on PPLBuiltinOperators.LIST. Stringifying here would break Calcite's
// type-checking and the response-boundary UDT dispatch for ip / binary columns.
if (value != null && acc.size() < DEFAULT_LIMIT) {
// Convert value to string, handling all types safely
String stringValue = String.valueOf(value);
acc.add(stringValue);
acc.add(value);
}

return acc;
}

public static class ListAccumulator implements Accumulator {
private final List<String> values;
private final List<Object> values;

public ListAccumulator() {
this.values = new ArrayList<>();
Expand All @@ -69,7 +68,7 @@ public Object value(Object... argList) {
return values;
}

public void add(String value) {
public void add(Object value) {
values.add(value);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.opensearch.sql.calcite.CalcitePlanContext;
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
import org.opensearch.sql.common.response.ResponseListener;
import org.opensearch.sql.data.model.ExprCollectionValue;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
Expand Down Expand Up @@ -215,6 +216,8 @@ private List<ExprValue> convertRows(Iterable<Object[]> rows, List<RelDataTypeFie
* IpFieldMapper}'s {@code valueFetcher} output).
* <li>{@link BinaryType} + {@code byte[]} &rarr; base64-encoded string (matches the OpenSearch
* {@code binary} field wire format).
* <li>{@code ARRAY<IpType>} / {@code ARRAY<BinaryType>} + {@code List<byte[]>} &rarr;
* element-wise UDT-aware conversion for {@code list(ip|binary)} aggregates.
* <li>Anything else &rarr; existing {@link ExprValueUtils#fromObjectValue} path.
* </ul>
*
Expand All @@ -224,19 +227,55 @@ private List<ExprValue> convertRows(Iterable<Object[]> rows, List<RelDataTypeFie
private static ExprValue toExprValue(Object value, RelDataType type) {
if (value instanceof byte[] bytes) {
if (type instanceof IpType) {
try {
return ExprValueUtils.stringValue(
InetAddresses.toAddrString(InetAddress.getByAddress(bytes)));
} catch (UnknownHostException e) {
throw new IllegalStateException("invalid IP buffer length: " + bytes.length, e);
}
return ipBytesToExprValue(bytes);
} else if (type instanceof BinaryType) {
return ExprValueUtils.stringValue(Base64.getEncoder().encodeToString(bytes));
return binaryBytesToExprValue(bytes);
}
}
if (value instanceof List<?> list) {
RelDataType component = type.getComponentType();
if (component instanceof IpType) {
List<ExprValue> elems = new ArrayList<>(list.size());
for (Object elem : list) {
if (elem == null) {
elems.add(ExprValueUtils.nullValue());
} else if (elem instanceof byte[] eb) {
elems.add(ipBytesToExprValue(eb));
} else {
elems.add(ExprValueUtils.fromObjectValue(elem));
}
}
return new ExprCollectionValue(elems);
} else if (component instanceof BinaryType) {
List<ExprValue> elems = new ArrayList<>(list.size());
for (Object elem : list) {
if (elem == null) {
elems.add(ExprValueUtils.nullValue());
} else if (elem instanceof byte[] eb) {
elems.add(binaryBytesToExprValue(eb));
} else {
elems.add(ExprValueUtils.fromObjectValue(elem));
}
}
return new ExprCollectionValue(elems);
}
}
return ExprValueUtils.fromObjectValue(value);
}

private static ExprValue ipBytesToExprValue(byte[] bytes) {
try {
return ExprValueUtils.stringValue(
InetAddresses.toAddrString(InetAddress.getByAddress(bytes)));
} catch (UnknownHostException e) {
throw new IllegalStateException("invalid IP buffer length: " + bytes.length, e);
}
}

private static ExprValue binaryBytesToExprValue(byte[] bytes) {
return ExprValueUtils.stringValue(Base64.getEncoder().encodeToString(bytes));
}

private Schema buildSchema(List<RelDataTypeField> fields) {
List<Schema.Column> columns = new ArrayList<>();
for (RelDataTypeField field : fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
"pattern",
ReturnTypes.explicit(UserDefinedFunctionUtils.nullablePatternAggList),
null);
// ARG0_ARRAY preserves UDT element type so AnalyticsExecutionEngine can render IP/binary byte[].
public static final SqlAggFunction LIST =
createUserDefinedAggFunction(
ListAggFunction.class, "LIST", PPLReturnTypes.STRING_ARRAY, PPLOperandTypes.ANY_SCALAR);
ListAggFunction.class, "LIST", PPLReturnTypes.ARG0_ARRAY, PPLOperandTypes.ANY_SCALAR);
Comment thread
vinaykpud marked this conversation as resolved.
public static final SqlAggFunction VALUES =
createUserDefinedAggFunction(
ValuesAggFunction.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public void testListFunctionWithBoolean() throws IOException {
"source=%s | stats list(boolean_value) as bool_list",
TEST_INDEX_DATATYPE_NONNUMERIC));
verifySchema(response, schema("bool_list", "array"));
verifyDataRows(response, rows(List.of("true")));
verifyDataRows(response, rows(List.of(true)));

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ps48 Do u remember any reasons we enforce return ARRAY(STRING) in PPL?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was based on industry standards and workings of multivalue stats commands.

@vinaykpud vinaykpud Jun 5, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide example on this why the return value is always kept string?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here's the RFC and the decisions we made through the initial implementation #4026

@vinaykpud vinaykpud Jun 5, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @penghuo @ps48 , I fixed it by rewriting the plan which will keep the contract same.
opensearch-project/OpenSearch#21997

}

@Test
Expand All @@ -53,7 +53,7 @@ public void testListFunctionWithByte() throws IOException {
String.format(
"source=%s | stats list(byte_number) as byte_list", TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("byte_list", "array"));
verifyDataRows(response, rows(List.of("4")));
verifyDataRows(response, rows(List.of(4)));
}

@Test
Expand All @@ -63,7 +63,7 @@ public void testListFunctionWithShort() throws IOException {
String.format(
"source=%s | stats list(short_number) as short_list", TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("short_list", "array"));
verifyDataRows(response, rows(List.of("3")));
verifyDataRows(response, rows(List.of(3)));
}

@Test
Expand All @@ -73,7 +73,7 @@ public void testListFunctionWithInteger() throws IOException {
String.format(
"source=%s | stats list(integer_number) as int_list", TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("int_list", "array"));
verifyDataRows(response, rows(List.of("2")));
verifyDataRows(response, rows(List.of(2)));
}

@Test
Expand All @@ -83,7 +83,7 @@ public void testListFunctionWithLong() throws IOException {
String.format(
"source=%s | stats list(long_number) as long_list", TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("long_list", "array"));
verifyDataRows(response, rows(List.of("1")));
verifyDataRows(response, rows(List.of(1)));
}

@Test
Expand All @@ -93,7 +93,7 @@ public void testListFunctionWithFloat() throws IOException {
String.format(
"source=%s | stats list(float_number) as float_list", TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("float_list", "array"));
verifyDataRows(response, rows(List.of("6.2")));
verifyDataRows(response, rows(List.of(6.2)));
}

@Test
Expand All @@ -104,7 +104,7 @@ public void testListFunctionWithDouble() throws IOException {
"source=%s | stats list(double_number) as double_list",
TEST_INDEX_DATATYPE_NUMERIC));
verifySchema(response, schema("double_list", "array"));
verifyDataRows(response, rows(List.of("5.1")));
verifyDataRows(response, rows(List.of(5.1)));
}

@Test
Expand Down Expand Up @@ -188,7 +188,7 @@ public void testListFunctionWithNullValues() throws IOException {
String.format("source=%s | head 5 | stats list(int0) as int_list", TEST_INDEX_CALCS));
verifySchema(response, schema("int_list", "array"));
// Nulls are filtered out by list function
verifyDataRows(response, rows(List.of("1", "7")));
verifyDataRows(response, rows(List.of(1, 7)));
}

@Test
Expand Down Expand Up @@ -217,7 +217,7 @@ public void testListFunctionMultipleFields() throws IOException {
assert response.has("datarows");
// Values should be collected from the first 3 rows (str2 and int2 columns)
// The actual values depend on the test data - int2 column contains 5, -4, 5
verifyDataRows(response, rows(List.of("one", "two", "three"), List.of("5", "-4", "5")));
verifyDataRows(response, rows(List.of("one", "two", "three"), List.of(5, -4, 5)));
}

@Test
Expand Down Expand Up @@ -275,7 +275,7 @@ public void testListFunctionWithArithmeticExpression() throws IOException {
String.format(
"source=%s | head 3 | stats list(int3 + 1) as arithmetic_list", TEST_INDEX_CALCS));
verifySchema(response, schema("arithmetic_list", "array"));
verifyDataRows(response, rows(List.of("9", "14", "3")));
verifyDataRows(response, rows(List.of(9, 14, 3)));
}

// ==================== VALUES Function Tests ====================
Expand Down
Loading