Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4291,6 +4291,42 @@ public void approxMostFrequentTest() {
DATABASE_NAME);
}

@Test
public void approxPercentileTest() {
tableResultSetEqualTest(
"select approx_percentile(time, 0.5),approx_percentile(s1,0.5),approx_percentile(s2,0.5),approx_percentile(s3,0.5),approx_percentile(s4,0.5) from table1",
buildHeaders(5),
new String[] {"2024-09-24T06:15:40.000Z,40,46000,40.0,46.0,"},
DATABASE_NAME);

tableResultSetEqualTest(
"select time,province,approx_percentile(s1,0.5),approx_percentile(s2,0.5) from table1 group by 1,2 order by 2,1",
new String[] {"time", "province", "_col2", "_col3"},
new String[] {
"2024-09-24T06:15:30.000Z,beijing,30,0,",
"2024-09-24T06:15:31.000Z,beijing,0,31000,",
"2024-09-24T06:15:35.000Z,beijing,0,35000,",
"2024-09-24T06:15:36.000Z,beijing,36,0,",
"2024-09-24T06:15:40.000Z,beijing,40,40000,",
"2024-09-24T06:15:41.000Z,beijing,41,0,",
"2024-09-24T06:15:46.000Z,beijing,0,46000,",
"2024-09-24T06:15:50.000Z,beijing,0,50000,",
"2024-09-24T06:15:51.000Z,beijing,0,0,",
"2024-09-24T06:15:55.000Z,beijing,55,0,",
"2024-09-24T06:15:30.000Z,shanghai,30,0,",
"2024-09-24T06:15:31.000Z,shanghai,0,31000,",
"2024-09-24T06:15:35.000Z,shanghai,0,35000,",
"2024-09-24T06:15:36.000Z,shanghai,36,0,",
"2024-09-24T06:15:40.000Z,shanghai,40,40000,",
"2024-09-24T06:15:41.000Z,shanghai,41,0,",
"2024-09-24T06:15:46.000Z,shanghai,0,46000,",
"2024-09-24T06:15:50.000Z,shanghai,0,50000,",
"2024-09-24T06:15:51.000Z,shanghai,0,0,",
"2024-09-24T06:15:55.000Z,shanghai,55,0,",
},
DATABASE_NAME);
}

@Test
public void exceptionTest() {
tableAssertTestFail(
Expand Down Expand Up @@ -4353,6 +4389,22 @@ public void exceptionTest() {
"select approx_most_frequent() from table1",
"701: Aggregation functions [approx_most_frequent] should only have three arguments",
DATABASE_NAME);
tableAssertTestFail(
"select approx_percentile() from table1",
"701: Aggregation functions [approx_percentile] should only have two or three arguments",
DATABASE_NAME);
tableAssertTestFail(
"select approx_percentile(s1,1.1) from table1",
"701: percentage should be in [0,1], got 1.1",
DATABASE_NAME);
tableAssertTestFail(
"select approx_percentile(s1,'test') from table1",
"701: The second argument of 'approx_percentile' function percentage must be a double literal",
DATABASE_NAME);
tableAssertTestFail(
"select approx_percentile(s5,0.5) from table1",
"701: Aggregation functions [approx_percentile] should have value column as numeric type [INT32, INT64, FLOAT, DOUBLE, TIMESTAMP]",
DATABASE_NAME);
}

// ==================================================================
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation;

import org.apache.iotdb.db.exception.sql.SemanticException;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.approximate.TDigest;

import org.apache.tsfile.block.column.Column;
import org.apache.tsfile.block.column.ColumnBuilder;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.file.metadata.statistics.Statistics;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.RamUsageEstimator;
import org.apache.tsfile.utils.ReadWriteIOUtils;
import org.apache.tsfile.write.UnSupportedDataTypeException;

import java.nio.ByteBuffer;

public abstract class AbstractApproxPercentileAccumulator implements TableAccumulator {
private static final long INSTANCE_SIZE =
RamUsageEstimator.shallowSizeOfInstance(ApproxPercentileAccumulator.class);

protected final TDigest tDigest = new TDigest();
protected final TSDataType seriesDataType;
protected double percentage;

AbstractApproxPercentileAccumulator(TSDataType seriesDataType) {
this.seriesDataType = seriesDataType;
}

@Override
public long getEstimatedSize() {
return INSTANCE_SIZE + tDigest.getEstimatedSize();
}

@Override
public TableAccumulator copy() {
return new ApproxPercentileAccumulator(seriesDataType);
}

@Override
public void addInput(Column[] arguments, AggregationMask mask) {
if (arguments.length == 2) {
percentage = arguments[1].getDouble(0);
} else if (arguments.length == 3) {
percentage = arguments[2].getDouble(0);
} else {
throw new SemanticException(
String.format(
"APPROX_PERCENTILE requires 2 or 3 arguments, but got %d", arguments.length));
}
switch (seriesDataType) {
case INT32:
addIntInput(arguments, mask);
return;
case INT64:
case TIMESTAMP:
addLongInput(arguments, mask);
return;
case FLOAT:
addFloatInput(arguments, mask);
return;
case DOUBLE:
addDoubleInput(arguments, mask);
return;
default:
throw new UnSupportedDataTypeException(
String.format(
"Unsupported data type in APPROX_PERCENTILE Aggregation: %s", seriesDataType));
}
}

@Override
public void addIntermediate(Column argument) {
for (int i = 0; i < argument.getPositionCount(); i++) {
if (!argument.isNull(i)) {
byte[] data = argument.getBinary(i).getValues();
// Read percentage from the first 8 bytes and TDigest from the rest
ByteBuffer buffer = ByteBuffer.wrap(data);
this.percentage = ReadWriteIOUtils.readDouble(buffer);
TDigest other = TDigest.fromByteBuffer(buffer);
tDigest.add(other);
}
}
}

@Override
public void evaluateIntermediate(ColumnBuilder columnBuilder) {
int tDigestDataLength = tDigest.byteSize();
// Create a buffer with space for percentage (8 bytes) + TDigest data
ByteBuffer buffer = ByteBuffer.allocate(8 + tDigestDataLength);
ReadWriteIOUtils.write(percentage, buffer);
tDigest.toByteArray(buffer);
columnBuilder.writeBinary(new Binary(buffer.array()));
}

@Override
public void evaluateFinal(ColumnBuilder columnBuilder) {
switch (seriesDataType) {
case INT32:
columnBuilder.writeInt((int) tDigest.quantile(percentage));
break;
case INT64:
case TIMESTAMP:
columnBuilder.writeLong((long) tDigest.quantile(percentage));
break;
case FLOAT:
columnBuilder.writeFloat((float) tDigest.quantile(percentage));
break;
case DOUBLE:
columnBuilder.writeDouble(tDigest.quantile(percentage));
break;
default:
throw new UnSupportedDataTypeException(
String.format(
"Unsupported data type in APPROX_PERCENTILE Aggregation: %s", seriesDataType));
}
}

@Override
public boolean hasFinalResult() {
return false;
}

@Override
public void addStatistics(Statistics[] statistics) {
throw new UnsupportedOperationException(
"ApproxPercentileAccumulator does not support statistics");
}

@Override
public void reset() {
tDigest.reset();
}

public abstract void addIntInput(Column[] arguments, AggregationMask mask);

public abstract void addLongInput(Column[] arguments, AggregationMask mask);

public abstract void addFloatInput(Column[] arguments, AggregationMask mask);

public abstract void addDoubleInput(Column[] arguments, AggregationMask mask);

public static double toDoubleExact(long value) {
double doubleValue = (double) value;
if ((long) doubleValue != value) {
throw new SemanticException(
String.format("no exact double representation for long: %s", value));
}
return value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.FloatGroupedApproxMostFrequentAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedApproxCountDistinctAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedApproxPercentileAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedApproxPercentileWithWeightAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedAvgAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedCountAccumulator;
import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedCountAllAccumulator;
Expand Down Expand Up @@ -256,6 +258,12 @@ private static GroupedAccumulator createBuiltinGroupedAccumulator(
return new GroupedApproxCountDistinctAccumulator(inputDataTypes.get(0));
case APPROX_MOST_FREQUENT:
return getGroupedApproxMostFrequentAccumulator(inputDataTypes.get(0));
case APPROX_PERCENTILE:
if (inputDataTypes.size() == 2) {
return new GroupedApproxPercentileAccumulator(inputDataTypes.get(0));
} else {
return new GroupedApproxPercentileWithWeightAccumulator(inputDataTypes.get(0));
}
default:
throw new IllegalArgumentException("Invalid Aggregation function: " + aggregationType);
}
Expand Down Expand Up @@ -325,6 +333,12 @@ public static TableAccumulator createBuiltinAccumulator(
return new ApproxCountDistinctAccumulator(inputDataTypes.get(0));
case APPROX_MOST_FREQUENT:
return getApproxMostFrequentAccumulator(inputDataTypes.get(0));
case APPROX_PERCENTILE:
if (inputDataTypes.size() == 2) {
return new ApproxPercentileAccumulator(inputDataTypes.get(0));
} else {
return new ApproxPercentileWithWeightAccumulator(inputDataTypes.get(0));
}
default:
throw new IllegalArgumentException("Invalid Aggregation function: " + aggregationType);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation;

import org.apache.tsfile.block.column.Column;
import org.apache.tsfile.enums.TSDataType;

public class ApproxPercentileAccumulator extends AbstractApproxPercentileAccumulator {

public ApproxPercentileAccumulator(TSDataType seriesDataType) {
super(seriesDataType);
}

@Override
public void addIntInput(Column[] arguments, AggregationMask mask) {

Column valueColumn = arguments[0];
int positionCount = mask.getPositionCount();

if (mask.isSelectAll()) {
for (int i = 0; i < valueColumn.getPositionCount(); i++) {
if (!valueColumn.isNull(i)) {
tDigest.add(valueColumn.getInt(i));
}
}
} else {
int[] selectedPositions = mask.getSelectedPositions();
int position;
for (int i = 0; i < positionCount; i++) {
position = selectedPositions[i];
if (!valueColumn.isNull(position)) {
tDigest.add(valueColumn.getInt(position));
}
}
}
}

@Override
public void addLongInput(Column[] arguments, AggregationMask mask) {

Column valueColumn = arguments[0];
int positionCount = mask.getPositionCount();

if (mask.isSelectAll()) {
for (int i = 0; i < valueColumn.getPositionCount(); i++) {
if (!valueColumn.isNull(i)) {
tDigest.add(toDoubleExact(valueColumn.getLong(i)));
}
}
} else {
int[] selectedPositions = mask.getSelectedPositions();
int position;
for (int i = 0; i < positionCount; i++) {
position = selectedPositions[i];
if (!valueColumn.isNull(position)) {
tDigest.add(toDoubleExact(valueColumn.getLong(position)));
}
}
}
}

@Override
public void addFloatInput(Column[] arguments, AggregationMask mask) {

Column valueColumn = arguments[0];
int positionCount = mask.getPositionCount();

if (mask.isSelectAll()) {
for (int i = 0; i < valueColumn.getPositionCount(); i++) {
if (!valueColumn.isNull(i)) {
tDigest.add(valueColumn.getFloat(i));
}
}
} else {
int[] selectedPositions = mask.getSelectedPositions();
int position;
for (int i = 0; i < positionCount; i++) {
position = selectedPositions[i];
if (!valueColumn.isNull(position)) {
tDigest.add(valueColumn.getFloat(position));
}
}
}
}

@Override
public void addDoubleInput(Column[] arguments, AggregationMask mask) {
Column valueColumn = arguments[0];
int positionCount = mask.getPositionCount();
if (mask.isSelectAll()) {
for (int i = 0; i < valueColumn.getPositionCount(); i++) {
if (!valueColumn.isNull(i)) {
tDigest.add(valueColumn.getDouble(i));
}
}
} else {
int[] selectedPositions = mask.getSelectedPositions();
int position;
for (int i = 0; i < positionCount; i++) {
position = selectedPositions[i];
if (!valueColumn.isNull(position)) {
tDigest.add(valueColumn.getDouble(position));
}
}
}
}
}
Loading
Loading