Skip to content

Commit 625df71

Browse files
committed
GITHUB#15740: Add FieldMinMax unified numeric min/max API
1 parent 86b338d commit 625df71

3 files changed

Lines changed: 304 additions & 0 deletions

File tree

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ API Changes
6060

6161
* GITHUB#15480: Deprecate SortField#setMissingValue and add migration test toward immutability. (Syed Mohammad Saad)
6262

63+
* GITHUB#15740: Add FieldMinMax utility to retrieve global min/max values of a numeric field across an IndexReader, unifying PointValues and DocValuesSkipper semantics and returning null when no values exist. (Syed Mohammad Saad)
64+
6365
New Features
6466
---------------------
6567

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.index;
18+
19+
import java.io.IOException;
20+
import org.apache.lucene.util.NumericUtils;
21+
22+
/**
23+
* Utility to retrieve global min/max values of a numeric field across an IndexReader.
24+
*
25+
* <p>This method abstracts over different storage implementations used by Lucene:
26+
*
27+
* <ul>
28+
* <li>BKD PointValues (IntPoint / LongPoint)
29+
* <li>DocValuesSkipper (fast metadata when available)
30+
* <li>NumericDocValues scan (correct fallback)
31+
* </ul>
32+
*
33+
* <p>Only single dimensional integral numeric fields are supported.
34+
*
35+
* <p>Returns {@code null} when:
36+
*
37+
* <ul>
38+
* <li>field does not exist
39+
* <li>field is floating point (float/double)
40+
* <li>field is multi-dimensional
41+
* <li>no segments contain values
42+
* </ul>
43+
*/
44+
public final class FieldMinMax {
45+
46+
private FieldMinMax() {}
47+
48+
/** Immutable min/max holder */
49+
public static final class MinMax {
50+
public final long min;
51+
public final long max;
52+
53+
public MinMax(long min, long max) {
54+
this.min = min;
55+
this.max = max;
56+
}
57+
}
58+
59+
/** Returns global min/max or null if unavailable */
60+
public static MinMax get(IndexReader reader, String field) throws IOException {
61+
62+
// ---- 1. Prefer PointValues (accurate index statistics) ----
63+
boolean found = false;
64+
long globalMin = Long.MAX_VALUE;
65+
long globalMax = Long.MIN_VALUE;
66+
67+
for (LeafReaderContext ctx : reader.leaves()) {
68+
LeafReader leaf = ctx.reader();
69+
70+
PointValues values = leaf.getPointValues(field);
71+
if (values == null || values.getNumDimensions() != 1) {
72+
continue;
73+
}
74+
75+
byte[] minPacked = values.getMinPackedValue();
76+
byte[] maxPacked = values.getMaxPackedValue();
77+
if (minPacked == null || maxPacked == null) {
78+
continue;
79+
}
80+
81+
int bytes = values.getBytesPerDimension();
82+
Long min = decodeIntegral(minPacked, bytes);
83+
Long max = decodeIntegral(maxPacked, bytes);
84+
85+
if (min != null && max != null) {
86+
found = true;
87+
globalMin = Math.min(globalMin, min);
88+
globalMax = Math.max(globalMax, max);
89+
}
90+
}
91+
92+
if (found) {
93+
return new MinMax(globalMin, globalMax);
94+
}
95+
96+
// ---- 2. Try DocValuesSkipper (fast metadata) ----
97+
long sMin = DocValuesSkipper.globalMinValue(reader, field);
98+
long sMax = DocValuesSkipper.globalMaxValue(reader, field);
99+
100+
if (isValidSkipperRange(sMin, sMax)) {
101+
return new MinMax(sMin, sMax);
102+
}
103+
104+
// ---- 3. Guaranteed fallback: scan NumericDocValues ----
105+
return scanNumericDocValues(reader, field);
106+
}
107+
108+
/** Decode integral numeric point values only */
109+
private static Long decodeIntegral(byte[] packed, int bytesPerDim) {
110+
switch (bytesPerDim) {
111+
case Integer.BYTES:
112+
return (long) NumericUtils.sortableBytesToInt(packed, 0);
113+
case Long.BYTES:
114+
return NumericUtils.sortableBytesToLong(packed, 0);
115+
default:
116+
return null; // float/double unsupported
117+
}
118+
}
119+
120+
/** Validate skipper sentinel semantics */
121+
private static boolean isValidSkipperRange(long min, long max) {
122+
if (min == Long.MAX_VALUE && max == Long.MIN_VALUE) return false;
123+
if (min == Long.MIN_VALUE && max == Long.MAX_VALUE) return false;
124+
return true;
125+
}
126+
127+
/** Full scan fallback for NumericDocValues */
128+
private static MinMax scanNumericDocValues(IndexReader reader, String field) throws IOException {
129+
boolean found = false;
130+
long min = Long.MAX_VALUE;
131+
long max = Long.MIN_VALUE;
132+
133+
for (LeafReaderContext ctx : reader.leaves()) {
134+
LeafReader leaf = ctx.reader();
135+
NumericDocValues values = leaf.getNumericDocValues(field);
136+
if (values == null) continue;
137+
138+
while (values.nextDoc() != NumericDocValues.NO_MORE_DOCS) {
139+
long v = values.longValue();
140+
found = true;
141+
min = Math.min(min, v);
142+
max = Math.max(max, v);
143+
}
144+
}
145+
146+
return found ? new MinMax(min, max) : null;
147+
}
148+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.index;
18+
19+
import org.apache.lucene.document.Document;
20+
import org.apache.lucene.document.IntPoint;
21+
import org.apache.lucene.document.NumericDocValuesField;
22+
import org.apache.lucene.store.Directory;
23+
import org.apache.lucene.tests.index.RandomIndexWriter;
24+
import org.apache.lucene.tests.util.LuceneTestCase;
25+
26+
public class TestFieldMinMax extends LuceneTestCase {
27+
28+
public void testMissingField() throws Exception {
29+
Directory dir = newDirectory();
30+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
31+
32+
w.addDocument(new Document());
33+
w.close();
34+
35+
DirectoryReader reader = DirectoryReader.open(dir);
36+
37+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
38+
assertNull(mm);
39+
40+
reader.close();
41+
dir.close();
42+
}
43+
44+
public void testIntPointMinMax() throws Exception {
45+
Directory dir = newDirectory();
46+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
47+
48+
Document d1 = new Document();
49+
d1.add(new IntPoint("age", 10));
50+
w.addDocument(d1);
51+
52+
Document d2 = new Document();
53+
d2.add(new IntPoint("age", 50));
54+
w.addDocument(d2);
55+
56+
Document d3 = new Document();
57+
d3.add(new IntPoint("age", 30));
58+
w.addDocument(d3);
59+
60+
w.close();
61+
62+
DirectoryReader reader = DirectoryReader.open(dir);
63+
64+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
65+
assertNotNull(mm);
66+
assertEquals(10, mm.min);
67+
assertEquals(50, mm.max);
68+
69+
reader.close();
70+
dir.close();
71+
}
72+
73+
public void testDocValuesMinMax() throws Exception {
74+
Directory dir = newDirectory();
75+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
76+
77+
Document d1 = new Document();
78+
d1.add(new NumericDocValuesField("score", 5));
79+
w.addDocument(d1);
80+
81+
Document d2 = new Document();
82+
d2.add(new NumericDocValuesField("score", 100));
83+
w.addDocument(d2);
84+
85+
Document d3 = new Document();
86+
d3.add(new NumericDocValuesField("score", 42));
87+
w.addDocument(d3);
88+
89+
w.commit();
90+
w.forceMerge(1); // ensures skipper metadata exists
91+
92+
w.close();
93+
94+
DirectoryReader reader = DirectoryReader.open(dir);
95+
96+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "score");
97+
assertNotNull(mm);
98+
assertEquals(5, mm.min);
99+
assertEquals(100, mm.max);
100+
101+
reader.close();
102+
dir.close();
103+
}
104+
105+
public void testMixedSegments() throws Exception {
106+
Directory dir = newDirectory();
107+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
108+
109+
Document d1 = new Document();
110+
d1.add(new IntPoint("age", 7));
111+
w.addDocument(d1);
112+
113+
w.commit(); // force new segment
114+
115+
Document d2 = new Document();
116+
d2.add(new IntPoint("age", 70));
117+
w.addDocument(d2);
118+
119+
w.close();
120+
121+
DirectoryReader reader = DirectoryReader.open(dir);
122+
123+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
124+
assertNotNull(mm);
125+
assertEquals(7, mm.min);
126+
assertEquals(70, mm.max);
127+
128+
reader.close();
129+
dir.close();
130+
}
131+
132+
public void testEmptySegmentIgnored() throws Exception {
133+
Directory dir = newDirectory();
134+
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
135+
136+
w.addDocument(new Document()); // empty doc
137+
138+
Document d = new Document();
139+
d.add(new IntPoint("age", 25));
140+
w.addDocument(d);
141+
142+
w.close();
143+
144+
DirectoryReader reader = DirectoryReader.open(dir);
145+
146+
FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
147+
assertNotNull(mm);
148+
assertEquals(25, mm.min);
149+
assertEquals(25, mm.max);
150+
151+
reader.close();
152+
dir.close();
153+
}
154+
}

0 commit comments

Comments
 (0)