Skip to content

Commit d243cad

Browse files
Deep Patelclaude
authored andcommitted
Address review: return RoaringBitmap/BitSet directly from getDictIdBitmap/getDictIdBitSet
- getDictIdBitmap now returns wrapper._dictIdBitmap (RoaringBitmap) directly - getDictIdBitSet now returns dictIdsWrapper._bitSet (BitSet) directly - Removed now-unused helper methods from DictIdsWrapper and GroupByDictIdsWrapper - Updated all call sites to use the raw types directly - Test: assert reference equality in batch-reuse test to directly verify wrapper reuse Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 3eca00d commit d243cad

2 files changed

Lines changed: 34 additions & 39 deletions

File tree

pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountHLLAggregationFunction.java

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ protected void aggregateSV(int length, AggregationResultHolder aggregationResult
119119
Dictionary dictionary = blockValSet.getDictionary();
120120
if (dictionary != null) {
121121
int[] dictIds = blockValSet.getDictionaryIdsSV();
122-
getDictIdBitSet(aggregationResultHolder, dictionary).addDictIds(dictIds, length);
122+
BitSet bitSet = getDictIdBitSet(aggregationResultHolder, dictionary);
123+
for (int i = 0; i < length; i++) {
124+
bitSet.set(dictIds[i]);
125+
}
123126
return;
124127
}
125128

@@ -167,9 +170,11 @@ protected void aggregateMV(int length, AggregationResultHolder aggregationResult
167170
Dictionary dictionary = blockValSet.getDictionary();
168171
if (dictionary != null) {
169172
int[][] dictIds = blockValSet.getDictionaryIdsMV();
170-
DictIdsWrapper dictIdsWrapper = getDictIdBitSet(aggregationResultHolder, dictionary);
173+
BitSet bitSet = getDictIdBitSet(aggregationResultHolder, dictionary);
171174
for (int i = 0; i < length; i++) {
172-
dictIdsWrapper.addDictIds(dictIds[i]);
175+
for (int dictId : dictIds[i]) {
176+
bitSet.set(dictId);
177+
}
173178
}
174179
return;
175180
}
@@ -638,30 +643,30 @@ public boolean canUseStarTree(Map<String, Object> functionParameters) {
638643
}
639644

640645
/**
641-
* Returns the {@link GroupByDictIdsWrapper} for the given group key, creating a new one if absent.
642-
* Uses a {@link RoaringBitmap} (sparse) so memory scales with distinct values per group, not dictionary size.
646+
* Returns the {@link RoaringBitmap} for the given group key, creating a new {@link GroupByDictIdsWrapper} if absent.
647+
* Uses a sparse bitmap so memory scales with distinct values per group, not dictionary size.
643648
*/
644-
protected static GroupByDictIdsWrapper getDictIdBitmap(GroupByResultHolder groupByResultHolder, int groupKey,
649+
protected static RoaringBitmap getDictIdBitmap(GroupByResultHolder groupByResultHolder, int groupKey,
645650
Dictionary dictionary) {
646651
GroupByDictIdsWrapper wrapper = groupByResultHolder.getResult(groupKey);
647652
if (wrapper == null) {
648653
wrapper = new GroupByDictIdsWrapper(dictionary);
649654
groupByResultHolder.setValueForKey(groupKey, wrapper);
650655
}
651-
return wrapper;
656+
return wrapper._dictIdBitmap;
652657
}
653658

654659
/**
655-
* Returns the {@link DictIdsWrapper} from the result holder, creating a new one if absent.
660+
* Returns the {@link BitSet} from the result holder, creating a new {@link DictIdsWrapper} if absent.
656661
*/
657-
protected static DictIdsWrapper getDictIdBitSet(AggregationResultHolder aggregationResultHolder,
662+
protected static BitSet getDictIdBitSet(AggregationResultHolder aggregationResultHolder,
658663
Dictionary dictionary) {
659664
DictIdsWrapper dictIdsWrapper = aggregationResultHolder.getResult();
660665
if (dictIdsWrapper == null) {
661666
dictIdsWrapper = new DictIdsWrapper(dictionary);
662667
aggregationResultHolder.setValue(dictIdsWrapper);
663668
}
664-
return dictIdsWrapper;
669+
return dictIdsWrapper._bitSet;
665670
}
666671

667672
/**
@@ -735,22 +740,6 @@ protected static final class DictIdsWrapper {
735740
_dictionary = dictionary;
736741
_bitSet = new BitSet(dictionary.length());
737742
}
738-
739-
void set(int dictId) {
740-
_bitSet.set(dictId);
741-
}
742-
743-
void addDictIds(int[] dictIds) {
744-
for (int dictId : dictIds) {
745-
_bitSet.set(dictId);
746-
}
747-
}
748-
749-
void addDictIds(int[] dictIds, int length) {
750-
for (int i = 0; i < length; i++) {
751-
_bitSet.set(dictIds[i]);
752-
}
753-
}
754743
}
755744

756745
/**
@@ -768,13 +757,5 @@ protected static final class GroupByDictIdsWrapper {
768757
_dictionary = dictionary;
769758
_dictIdBitmap = new RoaringBitmap();
770759
}
771-
772-
void add(int dictId) {
773-
_dictIdBitmap.add(dictId);
774-
}
775-
776-
void add(int[] dictIds) {
777-
_dictIdBitmap.add(dictIds);
778-
}
779760
}
780761
}

pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/DistinctCountHLLAggregationFunctionTest.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.pinot.core.query.aggregation.function;
2020

2121
import com.clearspring.analytics.stream.cardinality.HyperLogLog;
22+
import java.util.BitSet;
2223
import java.util.List;
2324
import java.util.Map;
2425
import org.apache.pinot.common.request.Literal;
@@ -89,7 +90,10 @@ public void testBitSetDeduplicationProducesCorrectCardinality() {
8990
List.of(ExpressionContext.forIdentifier("col"),
9091
ExpressionContext.forLiteral(Literal.intValue(12))));
9192

92-
DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary).addDictIds(dictIds, dictIds.length);
93+
BitSet bitSet = DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary);
94+
for (int dictId : dictIds) {
95+
bitSet.set(dictId);
96+
}
9397

9498
HyperLogLog result = function.extractAggregationResult(holder);
9599
// With log2m=12 the expected error is ~1.6%; allow 5% to be safe
@@ -121,7 +125,10 @@ public void testBitSetLargeCardinalityDictionary() {
121125
List.of(ExpressionContext.forIdentifier("col"),
122126
ExpressionContext.forLiteral(Literal.intValue(14))));
123127

124-
DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary).addDictIds(dictIds, dictIds.length);
128+
BitSet bitSet = DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary);
129+
for (int dictId : dictIds) {
130+
bitSet.set(dictId);
131+
}
125132

126133
HyperLogLog result = function.extractAggregationResult(holder);
127134
// log2m=14 gives ~0.8% error; allow 5%
@@ -152,14 +159,21 @@ public void testDictIdBitSetIsReusedAcrossBatches() {
152159
for (int i = 0; i < 100; i++) {
153160
batch1[i] = i;
154161
}
155-
DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary).addDictIds(batch1, batch1.length);
162+
BitSet bitSet = DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary);
163+
for (int dictId : batch1) {
164+
bitSet.set(dictId);
165+
}
156166

157-
// Batch 2: dict IDs 100–199 (using the same holder — wrapper must be reused)
167+
// Batch 2: dict IDs 100–199 (using the same holder — wrapper must be reused, same BitSet instance)
158168
int[] batch2 = new int[100];
159169
for (int i = 0; i < 100; i++) {
160170
batch2[i] = 100 + i;
161171
}
162-
DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary).addDictIds(batch2, batch2.length);
172+
BitSet bitSet2 = DistinctCountHLLAggregationFunction.getDictIdBitSet(holder, dictionary);
173+
Assert.assertSame(bitSet, bitSet2, "getDictIdBitSet must return the same BitSet on subsequent calls");
174+
for (int dictId : batch2) {
175+
bitSet2.set(dictId);
176+
}
163177

164178
HyperLogLog result = function.extractAggregationResult(holder);
165179
Assert.assertEquals(result.cardinality(), numDistinct, numDistinct * 0.05,

0 commit comments

Comments
 (0)