Skip to content

Commit 7db2833

Browse files
committed
Extend the skip-index fast path to non-dense blocks and reuse the interval tracker
1 parent c39f9ca commit 7db2833

7 files changed

Lines changed: 90 additions & 50 deletions

File tree

lucene/CHANGES.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ Improvements
291291

292292
Optimizations
293293
---------------------
294-
(No changes)
294+
* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter. (Jakub Slowinski)
295295

296296
Bug Fixes
297297
---------------------
@@ -568,8 +568,6 @@ Optimizations
568568

569569
* GITHUB#16228: Reuse scratch int[] for ordinal translation. (Tim Brooks)
570570

571-
* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups for dense blocks in LongRangeFacetCutter. (Jakub Slowinski)
572-
573571
Bug Fixes
574572
---------------------
575573
* GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ interface IntervalTracker extends OrdinalIterator {
3636
/** clear recorded information on this tracker. * */
3737
void clear();
3838

39+
/**
40+
* restart reading from the first recorded ordinal, to replay a {@link #freeze() frozen} tracker
41+
*/
42+
void rewind();
43+
3944
/** check if any data for the interval has been recorded * */
4045
boolean get(int index);
4146

@@ -71,6 +76,12 @@ public void clear() {
7176
intervalsWithHit = 0;
7277
}
7378

79+
@Override
80+
public void rewind() {
81+
bitFrom = 0;
82+
trackerState = 0;
83+
}
84+
7485
@Override
7586
public boolean get(int index) {
7687
return tracker.get(index);

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.apache.lucene.index.DocValuesSkipper;
2828
import org.apache.lucene.index.LeafReaderContext;
2929
import org.apache.lucene.index.NumericDocValues;
30-
import org.apache.lucene.index.SortedNumericDocValues;
3130
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
3231
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
3332
import org.apache.lucene.search.LongValues;
@@ -48,7 +47,7 @@ public abstract class LongRangeFacetCutter implements FacetCutter {
4847
// TODO: refactor - weird that we have both multi and single here.
4948
final LongValuesSource singleValues;
5049

51-
// Field to read a DocValuesSkipper from on the single-valued path, or null when disabled.
50+
// Field name whose skip index is used on the single-valued path, or null when faceting a source.
5251
final String skipField;
5352

5453
final LongRangeAndPos[] sortedRanges;
@@ -153,25 +152,18 @@ public static LongRangeFacetCutter create(String field, LongRange[] longRanges)
153152
abstract List<InclusiveRange> buildElementaryIntervals();
154153

155154
/**
156-
* Returns the {@link DocValuesSkipper} for {@link #skipField} in this segment. Null when: no skip
157-
* field is configured, the field has no skip index, or some doc in this segment has more than one
158-
* value.
155+
* Single-valued {@link LongValues} read directly from {@link #skipField} so its skip index can be
156+
* used, or null when there is no skip field or the segment is multi-valued.
159157
*/
160-
final DocValuesSkipper maybeSkipper(LeafReaderContext context) throws IOException {
158+
final LongValues singleValuedSkipField(LeafReaderContext context) throws IOException {
161159
if (skipField == null) {
162160
return null;
163161
}
164-
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), skipField);
165-
if (DocValues.unwrapSingleton(sortedNumeric) == null) {
166-
return null;
167-
}
168-
return context.reader().getDocValuesSkipper(skipField);
169-
}
170-
171-
/** Single-valued {@link LongValues} for {@link #skipField} in this segment. */
172-
final LongValues skipFieldValues(LeafReaderContext context) throws IOException {
173162
NumericDocValues values =
174163
DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), skipField));
164+
if (values == null) {
165+
return null;
166+
}
175167
return new LongValues() {
176168
@Override
177169
public long longValue() throws IOException {
@@ -313,15 +305,20 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC
313305

314306
IntervalTracker requestedIntervalTracker;
315307

316-
// Skip index for the faceted field, or null when disabled.
317308
private final DocValuesSkipper skipper;
318309

319-
// Cached decision from advanceSkipper, valid for every doc up to (and including) upToInclusive:
320-
// when upToSameInterval is true, all those docs map to elementary interval upToIntervalOrd.
310+
// advanceSkipper's decisions for the current block; the fields below hold while doc <=
311+
// upToInclusive, after which it runs again for the next block.
321312
private int upToInclusive = -1;
313+
// Whether every value in the block maps to the single interval upToIntervalOrd.
322314
private boolean upToSameInterval;
315+
// Whether every doc in the block has a value.
316+
private boolean upToDense;
323317
private int upToIntervalOrd;
324318

319+
// Interval of the previous doc with a value, for replaying the tracker on a repeat.
320+
private int previousIntervalOrd = -1;
321+
325322
LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) {
326323
this(longValues, boundaries, pos, null);
327324
}
@@ -342,28 +339,34 @@ public boolean advanceExact(int doc) throws IOException {
342339

343340
int intervalOrd;
344341
if (upToSameInterval) {
345-
// We are inside a dense skip block that maps entirely to one elementary interval, so reuse
346-
// the cached ordinal and skip the per-doc value lookup and binary search.
342+
// Reuse the cached ordinal, skipping the binary search. A dense block also skips the value
343+
// lookup, a sparse one still needs advanceExact to know whether this doc has a value.
344+
if (upToDense == false && longValues.advanceExact(doc) == false) {
345+
return false;
346+
}
347347
intervalOrd = upToIntervalOrd;
348348
} else if (longValues.advanceExact(doc)) {
349349
intervalOrd = processValue(longValues.longValue());
350350
} else {
351351
return false;
352352
}
353353

354-
if (requestedIntervalTracker != null) {
355-
requestedIntervalTracker.clear();
356-
}
357354
elementaryIntervalOrd = intervalOrd;
358-
maybeRollUp(requestedIntervalTracker);
359355
if (requestedIntervalTracker != null) {
360-
requestedIntervalTracker.freeze();
356+
if (skipper != null && intervalOrd == previousIntervalOrd) {
357+
// Same interval as the previous doc, so replay its frozen rollup instead of rebuilding.
358+
requestedIntervalTracker.rewind();
359+
} else {
360+
requestedIntervalTracker.clear();
361+
maybeRollUp(requestedIntervalTracker);
362+
requestedIntervalTracker.freeze();
363+
previousIntervalOrd = intervalOrd;
364+
}
361365
}
362366

363367
return true;
364368
}
365369

366-
/** Mirrors {@code HistogramCollector#advanceSkipper}. */
367370
private void advanceSkipper(int doc) throws IOException {
368371
if (doc > skipper.maxDocID(0)) {
369372
skipper.advance(doc);
@@ -378,14 +381,9 @@ private void advanceSkipper(int doc) throws IOException {
378381
}
379382

380383
upToInclusive = skipper.maxDocID(0);
381-
// Now find the highest level where all docs have a value and map to the same interval.
384+
// Climb to the highest level that still maps to a single interval.
382385
for (int level = 0; level < skipper.numLevels(); ++level) {
383-
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
384-
if (skipper.docCount(level) != totalDocsAtLevel) {
385-
// Some docs at this level have no value, so we can't resolve the whole block at once.
386-
break;
387-
}
388-
// Long fields store raw values, the skipper's min/max map straight into the boundary space.
386+
// Long fields store raw values, skipper's min/max maps straight into the boundary space.
389387
int minInterval = processValue(skipper.minValue(level));
390388
int maxInterval = processValue(skipper.maxValue(level));
391389
if (minInterval != maxInterval) {
@@ -394,6 +392,8 @@ private void advanceSkipper(int doc) throws IOException {
394392
upToInclusive = skipper.maxDocID(level);
395393
upToSameInterval = true;
396394
upToIntervalOrd = minInterval;
395+
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
396+
upToDense = skipper.docCount(level) == totalDocsAtLevel;
397397
}
398398
}
399399

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,11 @@ List<InclusiveRange> buildElementaryIntervals() {
7070

7171
@Override
7272
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
73-
// Use the skip index when we can, otherwise fall back to the value source.
74-
DocValuesSkipper skipper = maybeSkipper(context);
75-
if (skipper != null) {
76-
LongValues values = skipFieldValues(context);
73+
LongValues skipFieldValues = singleValuedSkipField(context);
74+
if (skipFieldValues != null) {
75+
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField);
7776
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(
78-
values, boundaries, pos, skipper);
77+
skipFieldValues, boundaries, pos, skipper);
7978
}
8079
if (singleValues != null) {
8180
LongValues values = singleValues.getValues(context, null);

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,11 @@ private static LongRangeNode split(int start, int end, List<InclusiveRange> elem
149149

150150
@Override
151151
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
152-
// Use the skip index when we can, otherwise fall back to the value source.
153-
DocValuesSkipper skipper = maybeSkipper(context);
154-
if (skipper != null) {
155-
LongValues values = skipFieldValues(context);
152+
LongValues skipFieldValues = singleValuedSkipField(context);
153+
if (skipFieldValues != null) {
154+
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField);
156155
return new OverlappingSingleValuedRangeLeafFacetCutter(
157-
values, boundaries, pos, requestedRangeCount, root, skipper);
156+
skipFieldValues, boundaries, pos, requestedRangeCount, root, skipper);
158157
}
159158
if (singleValues != null) {
160159
LongValues values = singleValues.getValues(context, null);

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ private RangeFacetBuilderFactory() {}
3535

3636
/** Request long range facets for numeric field by name. */
3737
public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) {
38-
// Pass the field by name so we can use its skip index when present.
3938
return new CommonFacetBuilder(
4039
field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges))
4140
.withSortByOrdinal();

lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,6 @@ public void testSkipIndexEquivalenceExtremeValues() throws Exception {
944944
default -> TestUtil.nextLong(random(), -100, 100);
945945
};
946946
doc.add(NumericDocValuesField.indexedField("field", v));
947-
doc.add(new LongPoint("point", v));
948947
w.addDocument(doc);
949948
}
950949

@@ -998,8 +997,43 @@ public void testSkipIndexEquivalenceMultiValued() throws Exception {
998997
IOUtils.close(dir);
999998
}
1000999

1001-
// Asserts faceting "field" by name (skip index when available) matches faceting via a
1002-
// MultiLongValuesSource (no skip index), over random range sets including extreme bounds.
1000+
public void testSkipIndexEquivalenceFewValues() throws Exception {
1001+
Directory dir = newDirectory();
1002+
IndexWriterConfig iwc = newIndexWriterConfig();
1003+
iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG, false)));
1004+
iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4)));
1005+
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
1006+
1007+
int numDocs = atLeast(1000);
1008+
for (int i = 0; i < numDocs; i++) {
1009+
Document doc = new Document();
1010+
doc.add(NumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), 0, 5)));
1011+
w.addDocument(doc);
1012+
}
1013+
1014+
assertSkipIndexEquivalence(w, "few-values");
1015+
1016+
w.close();
1017+
IOUtils.close(dir);
1018+
}
1019+
1020+
public void testSingleValuedNoSkipIndex() throws Exception {
1021+
Directory dir = newDirectory();
1022+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
1023+
1024+
int numDocs = atLeast(1000);
1025+
for (int i = 0; i < numDocs; i++) {
1026+
Document doc = new Document();
1027+
doc.add(new NumericDocValuesField("field", TestUtil.nextLong(random(), -100, 100)));
1028+
w.addDocument(doc);
1029+
}
1030+
1031+
assertSkipIndexEquivalence(w, "single-valued-no-skip");
1032+
1033+
w.close();
1034+
IOUtils.close(dir);
1035+
}
1036+
10031037
private void assertSkipIndexEquivalence(RandomIndexWriter w, String desc) throws IOException {
10041038
IndexReader r = w.getReader();
10051039
try {

0 commit comments

Comments
 (0)