Skip to content

Commit 03d7d2a

Browse files
committed
GITHUB#16249: Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter
1 parent 7787b70 commit 03d7d2a

6 files changed

Lines changed: 340 additions & 11 deletions

File tree

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,8 @@ Optimizations
536536

537537
* GITHUB#16228: Reuse scratch int[] for ordinal translation. (Tim Brooks)
538538

539+
* GITHUB#16249: Use the doc-values skip index to skip per-doc value lookups for dense blocks in LongRangeFacetCutter. (Jakub Slowinski)
540+
539541
Bug Fixes
540542
---------------------
541543
* GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java

Lines changed: 132 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
import org.apache.lucene.facet.MultiLongValues;
2424
import org.apache.lucene.facet.MultiLongValuesSource;
2525
import org.apache.lucene.facet.range.LongRange;
26+
import org.apache.lucene.index.DocValues;
27+
import org.apache.lucene.index.DocValuesSkipper;
28+
import org.apache.lucene.index.LeafReaderContext;
29+
import org.apache.lucene.index.NumericDocValues;
30+
import org.apache.lucene.index.SortedNumericDocValues;
2631
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
2732
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
2833
import org.apache.lucene.search.LongValues;
@@ -42,6 +47,10 @@ public abstract class LongRangeFacetCutter implements FacetCutter {
4247

4348
// TODO: refactor - weird that we have both multi and single here.
4449
final LongValuesSource singleValues;
50+
51+
// Field to read a DocValuesSkipper from on the single-valued path, or null when disabled.
52+
final String skipField;
53+
4554
final LongRangeAndPos[] sortedRanges;
4655

4756
final int requestedRangeCount;
@@ -62,32 +71,51 @@ static LongRangeFacetCutter createSingleOrMultiValued(
6271
MultiLongValuesSource longValuesSource,
6372
LongValuesSource singleLongValuesSource,
6473
LongRange[] longRanges) {
74+
return createSingleOrMultiValued(longValuesSource, singleLongValuesSource, longRanges, null);
75+
}
76+
77+
/** Same as above, but uses the {@code skipField} skip index on the single-valued path. */
78+
static LongRangeFacetCutter createSingleOrMultiValued(
79+
MultiLongValuesSource longValuesSource,
80+
LongValuesSource singleLongValuesSource,
81+
LongRange[] longRanges,
82+
String skipField) {
6583
if (areOverlappingRanges(longRanges)) {
6684
return new OverlappingLongRangeFacetCutter(
67-
longValuesSource, singleLongValuesSource, longRanges);
85+
longValuesSource, singleLongValuesSource, longRanges, skipField);
6886
}
6987
return new NonOverlappingLongRangeFacetCutter(
70-
longValuesSource, singleLongValuesSource, longRanges);
88+
longValuesSource, singleLongValuesSource, longRanges, skipField);
7189
}
7290

7391
public static LongRangeFacetCutter create(
7492
MultiLongValuesSource longValuesSource, LongRange[] longRanges) {
75-
return createSingleOrMultiValued(longValuesSource, null, longRanges);
93+
return createSingleOrMultiValued(longValuesSource, null, longRanges, null);
94+
}
95+
96+
/** Create {@link FacetCutter} for a long field by name, using its skip index when present. */
97+
public static LongRangeFacetCutter create(String field, LongRange[] longRanges) {
98+
// Leave the single-valued source null. The skip path reads the field directly, and a
99+
// multi-valued segment must fall back to the multi-valued leaf cutter.
100+
return createSingleOrMultiValued(
101+
MultiLongValuesSource.fromLongField(field), null, longRanges, field);
76102
}
77103

78104
// caller handles conversion of Doubles and DoubleRange to Long and LongRange
79105
// ranges need not be sorted
80106
LongRangeFacetCutter(
81107
MultiLongValuesSource longValuesSource,
82108
LongValuesSource singleLongValuesSource,
83-
LongRange[] longRanges) {
109+
LongRange[] longRanges,
110+
String skipField) {
84111
super();
85112
valuesSource = longValuesSource;
86113
if (singleLongValuesSource != null) {
87114
singleValues = singleLongValuesSource;
88115
} else {
89116
singleValues = MultiLongValuesSource.unwrapSingleton(valuesSource);
90117
}
118+
this.skipField = skipField;
91119

92120
sortedRanges = new LongRangeAndPos[longRanges.length];
93121
requestedRangeCount = longRanges.length;
@@ -124,6 +152,39 @@ public static LongRangeFacetCutter create(
124152
*/
125153
abstract List<InclusiveRange> buildElementaryIntervals();
126154

155+
/**
156+
* Returns the {@link DocValuesSkipper} for {@link #skipField} in this segment. Null when: no skip
157+
* field is configured, the field has no skip index, or some doc in this segment has more than one
158+
* value.
159+
*/
160+
final DocValuesSkipper maybeSkipper(LeafReaderContext context) throws IOException {
161+
if (skipField == null) {
162+
return null;
163+
}
164+
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), skipField);
165+
if (DocValues.unwrapSingleton(sortedNumeric) == null) {
166+
return null;
167+
}
168+
return context.reader().getDocValuesSkipper(skipField);
169+
}
170+
171+
/** Single-valued {@link LongValues} for {@link #skipField} in this segment. */
172+
final LongValues skipFieldValues(LeafReaderContext context) throws IOException {
173+
NumericDocValues values =
174+
DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), skipField));
175+
return new LongValues() {
176+
@Override
177+
public long longValue() throws IOException {
178+
return values.longValue();
179+
}
180+
181+
@Override
182+
public boolean advanceExact(int doc) throws IOException {
183+
return values.advanceExact(doc);
184+
}
185+
};
186+
}
187+
127188
private static boolean areOverlappingRanges(LongRange[] ranges) {
128189
if (ranges.length == 0) {
129190
return false;
@@ -252,21 +313,52 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC
252313

253314
IntervalTracker requestedIntervalTracker;
254315

316+
// Skip index for the faceted field, or null when disabled.
317+
private final DocValuesSkipper skipper;
318+
319+
// Cached decision from advanceSkipper, valid for every doc up to (and including) upToInclusive:
320+
// when upToSameInterval is true, all those docs map to elementary interval upToIntervalOrd.
321+
private int upToInclusive = -1;
322+
private boolean upToSameInterval;
323+
private int upToIntervalOrd;
324+
255325
LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) {
326+
this(longValues, boundaries, pos, null);
327+
}
328+
329+
LongRangeSingleValuedLeafFacetCutter(
330+
LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) {
256331
this.longValues = longValues;
257332
this.boundaries = boundaries;
258333
this.pos = pos;
334+
this.skipper = skipper;
335+
// The skip path counts a dense block as one value per doc, so it's single-valued only.
336+
assert skipper == null || skipper.maxValueCount() <= 1
337+
: "skip-index fast path requires a single-valued field, got maxValueCount="
338+
+ skipper.maxValueCount();
259339
}
260340

261341
@Override
262342
public boolean advanceExact(int doc) throws IOException {
263-
if (longValues.advanceExact(doc) == false) {
343+
if (skipper != null && doc > upToInclusive) {
344+
advanceSkipper(doc);
345+
}
346+
347+
int intervalOrd;
348+
if (upToSameInterval) {
349+
// We are inside a dense skip block that maps entirely to one elementary interval, so reuse
350+
// the cached ordinal and skip the per-doc value lookup and binary search.
351+
intervalOrd = upToIntervalOrd;
352+
} else if (longValues.advanceExact(doc)) {
353+
intervalOrd = processValue(longValues.longValue());
354+
} else {
264355
return false;
265356
}
357+
266358
if (requestedIntervalTracker != null) {
267359
requestedIntervalTracker.clear();
268360
}
269-
elementaryIntervalOrd = processValue(longValues.longValue());
361+
elementaryIntervalOrd = intervalOrd;
270362
maybeRollUp(requestedIntervalTracker);
271363
if (requestedIntervalTracker != null) {
272364
requestedIntervalTracker.freeze();
@@ -275,6 +367,40 @@ public boolean advanceExact(int doc) throws IOException {
275367
return true;
276368
}
277369

370+
/** Mirrors {@code HistogramCollector#advanceSkipper}. */
371+
private void advanceSkipper(int doc) throws IOException {
372+
if (doc > skipper.maxDocID(0)) {
373+
skipper.advance(doc);
374+
}
375+
upToSameInterval = false;
376+
377+
if (skipper.minDocID(0) > doc) {
378+
// Corner case which happens if doc doesn't have a value and is between two intervals of the
379+
// skip index. Fall back to per-doc lookups until the next block.
380+
upToInclusive = skipper.minDocID(0) - 1;
381+
return;
382+
}
383+
384+
upToInclusive = skipper.maxDocID(0);
385+
// Now find the highest level where all docs have a value and map to the same interval.
386+
for (int level = 0; level < skipper.numLevels(); ++level) {
387+
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
388+
if (skipper.docCount(level) != totalDocsAtLevel) {
389+
// Some docs at this level have no value, so we can't resolve the whole block at once.
390+
break;
391+
}
392+
// Long fields store raw values, the skipper's min/max map straight into the boundary space.
393+
int minInterval = processValue(skipper.minValue(level));
394+
int maxInterval = processValue(skipper.maxValue(level));
395+
if (minInterval != maxInterval) {
396+
break;
397+
}
398+
upToInclusive = skipper.maxDocID(level);
399+
upToSameInterval = true;
400+
upToIntervalOrd = minInterval;
401+
}
402+
}
403+
278404
// Returns the value of the interval v belongs or lastIntervalSeen
279405
// if no processing is done, it returns the lastIntervalSeen
280406
private int processValue(long v) {

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.lucene.facet.MultiLongValues;
2323
import org.apache.lucene.facet.MultiLongValuesSource;
2424
import org.apache.lucene.facet.range.LongRange;
25+
import org.apache.lucene.index.DocValuesSkipper;
2526
import org.apache.lucene.index.LeafReaderContext;
2627
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
2728
import org.apache.lucene.search.LongValues;
@@ -32,8 +33,9 @@ class NonOverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
3233
NonOverlappingLongRangeFacetCutter(
3334
MultiLongValuesSource longValuesSource,
3435
LongValuesSource singleLongValuesSource,
35-
LongRange[] longRanges) {
36-
super(longValuesSource, singleLongValuesSource, longRanges);
36+
LongRange[] longRanges,
37+
String skipField) {
38+
super(longValuesSource, singleLongValuesSource, longRanges, skipField);
3739
}
3840

3941
/**
@@ -68,6 +70,13 @@ List<InclusiveRange> buildElementaryIntervals() {
6870

6971
@Override
7072
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
73+
// Use the skip index when we can, otherwise fall back to the value source.
74+
DocValuesSkipper skipper = maybeSkipper(context);
75+
if (skipper != null) {
76+
LongValues values = skipFieldValues(context);
77+
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(
78+
values, boundaries, pos, skipper);
79+
}
7180
if (singleValues != null) {
7281
LongValues values = singleValues.getValues(context, null);
7382
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(values, boundaries, pos);
@@ -112,6 +121,11 @@ static class NonOverlappingLongRangeSingleValueLeafFacetCutter
112121
super(longValues, boundaries, pos);
113122
}
114123

124+
NonOverlappingLongRangeSingleValueLeafFacetCutter(
125+
LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) {
126+
super(longValues, boundaries, pos, skipper);
127+
}
128+
115129
@Override
116130
public int nextOrd() throws IOException {
117131
if (elementaryIntervalOrd == NO_MORE_ORDS) {

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.facet.MultiLongValues;
2626
import org.apache.lucene.facet.MultiLongValuesSource;
2727
import org.apache.lucene.facet.range.LongRange;
28+
import org.apache.lucene.index.DocValuesSkipper;
2829
import org.apache.lucene.index.LeafReaderContext;
2930
import org.apache.lucene.internal.hppc.IntCursor;
3031
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
@@ -43,8 +44,9 @@ class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
4344
OverlappingLongRangeFacetCutter(
4445
MultiLongValuesSource longValuesSource,
4546
LongValuesSource singleLongValuesSource,
46-
LongRange[] longRanges) {
47-
super(longValuesSource, singleLongValuesSource, longRanges);
47+
LongRange[] longRanges,
48+
String skipField) {
49+
super(longValuesSource, singleLongValuesSource, longRanges, skipField);
4850

4951
// Build binary tree on top of intervals:
5052
root = split(0, elementaryIntervals.size(), elementaryIntervals);
@@ -147,6 +149,13 @@ private static LongRangeNode split(int start, int end, List<InclusiveRange> elem
147149

148150
@Override
149151
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
152+
// Use the skip index when we can, otherwise fall back to the value source.
153+
DocValuesSkipper skipper = maybeSkipper(context);
154+
if (skipper != null) {
155+
LongValues values = skipFieldValues(context);
156+
return new OverlappingSingleValuedRangeLeafFacetCutter(
157+
values, boundaries, pos, requestedRangeCount, root, skipper);
158+
}
150159
if (singleValues != null) {
151160
LongValues values = singleValues.getValues(context, null);
152161
return new OverlappingSingleValuedRangeLeafFacetCutter(
@@ -233,6 +242,18 @@ static class OverlappingSingleValuedRangeLeafFacetCutter
233242
this.elementaryIntervalRoot = elementaryIntervalRoot;
234243
}
235244

245+
OverlappingSingleValuedRangeLeafFacetCutter(
246+
LongValues longValues,
247+
long[] boundaries,
248+
int[] pos,
249+
int requestedRangeCount,
250+
LongRangeNode elementaryIntervalRoot,
251+
DocValuesSkipper skipper) {
252+
super(longValues, boundaries, pos, skipper);
253+
requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount);
254+
this.elementaryIntervalRoot = elementaryIntervalRoot;
255+
}
256+
236257
@Override
237258
void maybeRollUp(IntervalTracker rollUpInto) {
238259
// TODO: for single valued we can rollup after collecting all documents, e.g. in reduce

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ private RangeFacetBuilderFactory() {}
3535

3636
/** Request long range facets for numeric field by name. */
3737
public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) {
38-
return forLongRanges(field, MultiLongValuesSource.fromLongField(field), ranges);
38+
// Pass the field by name so we can use its skip index when present.
39+
return new CommonFacetBuilder(
40+
field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges))
41+
.withSortByOrdinal();
3942
}
4043

4144
/**

0 commit comments

Comments
 (0)