Skip to content

Commit ac19825

Browse files
committed
GITHUB#15740: Add FieldMinMax unified numeric min/max API
1 parent 86b338d commit ac19825

3 files changed

Lines changed: 314 additions & 0 deletions

File tree

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ API Changes
6060

6161
* GITHUB#15480: Deprecate SortField#setMissingValue and add migration test toward immutability. (Syed Mohammad Saad)
6262

63+
* GITHUB#15740: Add FieldMinMax utility to retrieve global min/max values of a numeric field across an IndexReader, unifying PointValues and DocValuesSkipper semantics and returning null when no values exist. (Syed Mohammad Saad)
64+
6365
New Features
6466
---------------------
6567

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.index;
18+
19+
import java.io.IOException;
20+
import org.apache.lucene.util.NumericUtils;
21+
22+
/**
23+
* Utility to retrieve global min/max values of a numeric field across an IndexReader.
24+
*
25+
* <p>This method abstracts over different storage implementations used by Lucene:
26+
*
27+
* <ul>
28+
* <li>BKD PointValues (IntPoint / LongPoint)
29+
* <li>DocValuesSkipper (fast metadata when available)
30+
* <li>NumericDocValues scan (correct fallback)
31+
* </ul>
32+
*
33+
* <p>Only single dimensional integral numeric fields are supported.
34+
*
35+
* <p>Returns {@code null} when:
36+
*
37+
* <ul>
38+
* <li>field does not exist
39+
* <li>field is floating point (float/double)
40+
* <li>field is multi-dimensional
41+
* <li>no segments contain values
42+
* </ul>
43+
*/
44+
public final class FieldMinMax {
45+
46+
private FieldMinMax() {}
47+
48+
/** Immutable holder for global minimum and maximum values. */
49+
public static final class MinMax {
50+
51+
/** The minimum value across all documents. */
52+
public final long min;
53+
54+
/** The maximum value across all documents. */
55+
public final long max;
56+
57+
/**
58+
* Creates a new {@link MinMax} instance.
59+
*
60+
* @param min the minimum value
61+
* @param max the maximum value
62+
*/
63+
public MinMax(long min, long max) {
64+
this.min = min;
65+
this.max = max;
66+
}
67+
}
68+
69+
/** Returns global min/max or null if unavailable */
70+
public static MinMax get(IndexReader reader, String field) throws IOException {
71+
72+
// ---- 1. Prefer PointValues (accurate index statistics) ----
73+
boolean found = false;
74+
long globalMin = Long.MAX_VALUE;
75+
long globalMax = Long.MIN_VALUE;
76+
77+
for (LeafReaderContext ctx : reader.leaves()) {
78+
LeafReader leaf = ctx.reader();
79+
80+
PointValues values = leaf.getPointValues(field);
81+
if (values == null || values.getNumDimensions() != 1) {
82+
continue;
83+
}
84+
85+
byte[] minPacked = values.getMinPackedValue();
86+
byte[] maxPacked = values.getMaxPackedValue();
87+
if (minPacked == null || maxPacked == null) {
88+
continue;
89+
}
90+
91+
int bytes = values.getBytesPerDimension();
92+
Long min = decodeIntegral(minPacked, bytes);
93+
Long max = decodeIntegral(maxPacked, bytes);
94+
95+
if (min != null && max != null) {
96+
found = true;
97+
globalMin = Math.min(globalMin, min);
98+
globalMax = Math.max(globalMax, max);
99+
}
100+
}
101+
102+
if (found) {
103+
return new MinMax(globalMin, globalMax);
104+
}
105+
106+
// ---- 2. Try DocValuesSkipper (fast metadata) ----
107+
long sMin = DocValuesSkipper.globalMinValue(reader, field);
108+
long sMax = DocValuesSkipper.globalMaxValue(reader, field);
109+
110+
if (isValidSkipperRange(sMin, sMax)) {
111+
return new MinMax(sMin, sMax);
112+
}
113+
114+
// ---- 3. Guaranteed fallback: scan NumericDocValues ----
115+
return scanNumericDocValues(reader, field);
116+
}
117+
118+
/** Decode integral numeric point values only */
119+
private static Long decodeIntegral(byte[] packed, int bytesPerDim) {
120+
switch (bytesPerDim) {
121+
case Integer.BYTES:
122+
return (long) NumericUtils.sortableBytesToInt(packed, 0);
123+
case Long.BYTES:
124+
return NumericUtils.sortableBytesToLong(packed, 0);
125+
default:
126+
return null; // float/double unsupported
127+
}
128+
}
129+
130+
/** Validate skipper sentinel semantics */
131+
private static boolean isValidSkipperRange(long min, long max) {
132+
if (min == Long.MAX_VALUE && max == Long.MIN_VALUE) return false;
133+
if (min == Long.MIN_VALUE && max == Long.MAX_VALUE) return false;
134+
return true;
135+
}
136+
137+
/** Full scan fallback for NumericDocValues */
138+
private static MinMax scanNumericDocValues(IndexReader reader, String field) throws IOException {
139+
boolean found = false;
140+
long min = Long.MAX_VALUE;
141+
long max = Long.MIN_VALUE;
142+
143+
for (LeafReaderContext ctx : reader.leaves()) {
144+
LeafReader leaf = ctx.reader();
145+
NumericDocValues values = leaf.getNumericDocValues(field);
146+
if (values == null) continue;
147+
148+
while (values.nextDoc() != NumericDocValues.NO_MORE_DOCS) {
149+
long v = values.longValue();
150+
found = true;
151+
min = Math.min(min, v);
152+
max = Math.max(max, v);
153+
}
154+
}
155+
156+
return found ? new MinMax(min, max) : null;
157+
}
158+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.index;
18+
19+
import org.apache.lucene.document.Document;
20+
import org.apache.lucene.document.IntPoint;
21+
import org.apache.lucene.document.NumericDocValuesField;
22+
import org.apache.lucene.store.Directory;
23+
import org.apache.lucene.tests.index.RandomIndexWriter;
24+
import org.apache.lucene.tests.util.LuceneTestCase;
25+
26+
public class TestFieldMinMax extends LuceneTestCase {
27+
28+
public void testMissingField() throws Exception {
29+
Directory dir = newDirectory();
30+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
31+
32+
w.addDocument(new Document());
33+
w.close();
34+
35+
DirectoryReader reader = DirectoryReader.open(dir);
36+
37+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
38+
assertNull(mm);
39+
40+
reader.close();
41+
dir.close();
42+
}
43+
44+
public void testIntPointMinMax() throws Exception {
45+
Directory dir = newDirectory();
46+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
47+
48+
Document d1 = new Document();
49+
d1.add(new IntPoint("age", 10));
50+
w.addDocument(d1);
51+
52+
Document d2 = new Document();
53+
d2.add(new IntPoint("age", 50));
54+
w.addDocument(d2);
55+
56+
Document d3 = new Document();
57+
d3.add(new IntPoint("age", 30));
58+
w.addDocument(d3);
59+
60+
w.close();
61+
62+
DirectoryReader reader = DirectoryReader.open(dir);
63+
64+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
65+
assertNotNull(mm);
66+
assertEquals(10, mm.min);
67+
assertEquals(50, mm.max);
68+
69+
reader.close();
70+
dir.close();
71+
}
72+
73+
public void testDocValuesMinMax() throws Exception {
74+
Directory dir = newDirectory();
75+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
76+
77+
Document d1 = new Document();
78+
d1.add(new NumericDocValuesField("score", 5));
79+
w.addDocument(d1);
80+
81+
Document d2 = new Document();
82+
d2.add(new NumericDocValuesField("score", 100));
83+
w.addDocument(d2);
84+
85+
Document d3 = new Document();
86+
d3.add(new NumericDocValuesField("score", 42));
87+
w.addDocument(d3);
88+
89+
w.commit();
90+
w.forceMerge(1); // ensures skipper metadata exists
91+
92+
w.close();
93+
94+
DirectoryReader reader = DirectoryReader.open(dir);
95+
96+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "score");
97+
assertNotNull(mm);
98+
assertEquals(5, mm.min);
99+
assertEquals(100, mm.max);
100+
101+
reader.close();
102+
dir.close();
103+
}
104+
105+
public void testMixedSegments() throws Exception {
106+
Directory dir = newDirectory();
107+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
108+
109+
Document d1 = new Document();
110+
d1.add(new IntPoint("age", 7));
111+
w.addDocument(d1);
112+
113+
w.commit(); // force new segment
114+
115+
Document d2 = new Document();
116+
d2.add(new IntPoint("age", 70));
117+
w.addDocument(d2);
118+
119+
w.close();
120+
121+
DirectoryReader reader = DirectoryReader.open(dir);
122+
123+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
124+
assertNotNull(mm);
125+
assertEquals(7, mm.min);
126+
assertEquals(70, mm.max);
127+
128+
reader.close();
129+
dir.close();
130+
}
131+
132+
public void testEmptySegmentIgnored() throws Exception {
133+
Directory dir = newDirectory();
134+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
135+
136+
w.addDocument(new Document()); // empty doc
137+
138+
Document d = new Document();
139+
d.add(new IntPoint("age", 25));
140+
w.addDocument(d);
141+
142+
w.close();
143+
144+
DirectoryReader reader = DirectoryReader.open(dir);
145+
146+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
147+
assertNotNull(mm);
148+
assertEquals(25, mm.min);
149+
assertEquals(25, mm.max);
150+
151+
reader.close();
152+
dir.close();
153+
}
154+
}

0 commit comments

Comments
 (0)