Skip to content

Commit 73f569b

Browse files
xiangfu0claude
andcommitted
Increase default TDigest compression for star-tree from 100 to 500
t-digest 3.3 has higher merge-order sensitivity than 3.2. At the previous default compression=100, multi-level merge in star-tree building produces ~0.35% error vs ground truth (compared to 0.06% with 3.2). Increasing to 500 brings error back to ~0.09%. Changes: - PercentileTDigestValueAggregator: DEFAULT_TDIGEST_COMPRESSION 100->500 - StarTreeBuilderUtils: always inject default compression into expression context so the aggregator receives it explicitly - StarTreeV2BuilderConfig: persist compression=500 in functionParameters when using old functionColumnPairs syntax, so canUseStarTree() can distinguish old segments (no metadata, compression=100) from new segments (metadata=500) Backward compatibility: - Old segments have no compressionFactor in metadata -> canUseStarTree() treats them as compression=100 -> matched by queries with default compression (100) - New segments have compressionFactor=500 in metadata -> matched by queries explicitly specifying compression=500 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0b130ef commit 73f569b

3 files changed

Lines changed: 34 additions & 3 deletions

File tree

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/PercentileTDigestValueAggregator.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,11 @@
2929
public class PercentileTDigestValueAggregator implements ValueAggregator<Object, TDigest> {
3030
public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES;
3131

32-
// TODO: This is copied from PercentileTDigestAggregationFunction.
33-
public static final int DEFAULT_TDIGEST_COMPRESSION = 100;
32+
// NOTE: This is intentionally higher than the query-time default (100) in PercentileTDigestAggregationFunction.
33+
// t-digest 3.3 has higher merge-order sensitivity, and star-tree building involves multi-level merge with
34+
// intermediate serialization/deserialization. At compression=100, this causes ~0.35% error (vs 0.06% in 3.2).
35+
// Compression=500 brings error back to ~0.09%, comparable to 3.2's accuracy at compression=100.
36+
public static final int DEFAULT_TDIGEST_COMPRESSION = 500;
3437
private final int _compressionFactor;
3538
private int _maxByteSize;
3639

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.commons.io.FileUtils;
3535
import org.apache.pinot.common.request.Literal;
3636
import org.apache.pinot.common.request.context.ExpressionContext;
37+
import org.apache.pinot.segment.local.aggregator.PercentileTDigestValueAggregator;
3738
import org.apache.pinot.segment.local.startree.v2.builder.StarTreeV2BuilderConfig;
3839
import org.apache.pinot.segment.spi.AggregationFunctionType;
3940
import org.apache.pinot.segment.spi.Constants;
@@ -395,6 +396,12 @@ public static List<ExpressionContext> expressionContextFromFunctionParameters(
395396
expressionContexts.add(ExpressionContext.forLiteral(
396397
Literal.intValue(Integer.parseInt(String.valueOf(
397398
functionParameters.get(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY))))));
399+
} else {
400+
// Always inject the default compression so it is stored in star-tree metadata.
401+
// This allows canUseStarTree() to distinguish old segments (no metadata, built with compression=100)
402+
// from new segments (metadata present, built with the current default).
403+
expressionContexts.add(ExpressionContext.forLiteral(
404+
Literal.intValue(PercentileTDigestValueAggregator.DEFAULT_TDIGEST_COMPRESSION)));
398405
}
399406
break;
400407
}

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
import org.apache.commons.configuration2.Configuration;
3333
import org.apache.commons.lang3.builder.ToStringBuilder;
3434
import org.apache.commons.lang3.builder.ToStringStyle;
35+
import org.apache.pinot.segment.local.aggregator.PercentileTDigestValueAggregator;
3536
import org.apache.pinot.segment.spi.AggregationFunctionType;
3637
import org.apache.pinot.segment.spi.ColumnMetadata;
38+
import org.apache.pinot.segment.spi.Constants;
3739
import org.apache.pinot.segment.spi.SegmentMetadata;
3840
import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
3941
import org.apache.pinot.segment.spi.index.startree.AggregationSpec;
@@ -80,7 +82,8 @@ public static StarTreeV2BuilderConfig fromIndexConfig(StarTreeIndexConfig indexC
8082
AggregationFunctionColumnPair.resolveToStoredType(aggregationFunctionColumnPair);
8183
// If there is already an equivalent functionColumnPair in the map, do not load another.
8284
// This prevents the duplication of the aggregation when the StarTree is constructed.
83-
aggregationSpecs.putIfAbsent(storedType, AggregationSpec.DEFAULT);
85+
aggregationSpecs.putIfAbsent(storedType,
86+
getDefaultAggregationSpec(aggregationFunctionColumnPair.getFunctionType()));
8487
}
8588
}
8689
if (indexConfig.getAggregationConfigs() != null) {
@@ -323,4 +326,22 @@ public String toString() {
323326
.append("skipStarNodeCreation", _skipStarNodeCreationForDimensions)
324327
.append("aggregationSpecs", _aggregationSpecs).append("maxLeafRecords", _maxLeafRecords).toString();
325328
}
329+
330+
/**
331+
* Returns the default {@link AggregationSpec} for the given function type. For PERCENTILETDIGEST and
332+
* PERCENTILERAWTDIGEST, the default compression factor is explicitly included in the function parameters so that
333+
* it is persisted in star-tree metadata. This allows {@code canUseStarTree()} to distinguish old segments (built
334+
* with the previous default of 100, which have no compression metadata) from new segments.
335+
*/
336+
private static AggregationSpec getDefaultAggregationSpec(AggregationFunctionType functionType) {
337+
switch (functionType) {
338+
case PERCENTILETDIGEST:
339+
case PERCENTILERAWTDIGEST:
340+
return new AggregationSpec(null, null, null, null, null,
341+
Map.of(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY,
342+
PercentileTDigestValueAggregator.DEFAULT_TDIGEST_COMPRESSION));
343+
default:
344+
return AggregationSpec.DEFAULT;
345+
}
346+
}
326347
}

0 commit comments

Comments
 (0)