|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | +package org.apache.lucene.index; |
| 18 | + |
| 19 | +import java.io.IOException; |
| 20 | +import org.apache.lucene.util.NumericUtils; |
| 21 | + |
| 22 | +/** |
| 23 | + * Utility to retrieve global min/max values of a numeric field across an IndexReader. |
| 24 | + * |
| 25 | + * <p>This method abstracts over different storage implementations used by Lucene: |
| 26 | + * |
| 27 | + * <ul> |
| 28 | + * <li>BKD PointValues (IntPoint / LongPoint) |
| 29 | + * <li>DocValuesSkipper (fast metadata when available) |
| 30 | + * <li>NumericDocValues scan (correct fallback) |
| 31 | + * </ul> |
| 32 | + * |
| 33 | + * <p>Only single dimensional integral numeric fields are supported. |
| 34 | + * |
| 35 | + * <p>Returns {@code null} when: |
| 36 | + * |
| 37 | + * <ul> |
| 38 | + * <li>field does not exist |
| 39 | + * <li>field is floating point (float/double) |
| 40 | + * <li>field is multi-dimensional |
| 41 | + * <li>no segments contain values |
| 42 | + * </ul> |
| 43 | + */ |
| 44 | +public final class FieldMinMax { |
| 45 | + |
| 46 | + private FieldMinMax() {} |
| 47 | + |
| 48 | + /** Immutable holder for global minimum and maximum values. */ |
| 49 | + public static final class MinMax { |
| 50 | + |
| 51 | + /** The minimum value across all documents. */ |
| 52 | + public final long min; |
| 53 | + |
| 54 | + /** The maximum value across all documents. */ |
| 55 | + public final long max; |
| 56 | + |
| 57 | + /** |
| 58 | + * Creates a new {@link MinMax} instance. |
| 59 | + * |
| 60 | + * @param min the minimum value |
| 61 | + * @param max the maximum value |
| 62 | + */ |
| 63 | + public MinMax(long min, long max) { |
| 64 | + this.min = min; |
| 65 | + this.max = max; |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + /** Returns global min/max or null if unavailable */ |
| 70 | + public static MinMax get(IndexReader reader, String field) throws IOException { |
| 71 | + |
| 72 | + // ---- 1. Prefer PointValues (accurate index statistics) ---- |
| 73 | + boolean found = false; |
| 74 | + long globalMin = Long.MAX_VALUE; |
| 75 | + long globalMax = Long.MIN_VALUE; |
| 76 | + |
| 77 | + for (LeafReaderContext ctx : reader.leaves()) { |
| 78 | + LeafReader leaf = ctx.reader(); |
| 79 | + |
| 80 | + PointValues values = leaf.getPointValues(field); |
| 81 | + if (values == null || values.getNumDimensions() != 1) { |
| 82 | + continue; |
| 83 | + } |
| 84 | + |
| 85 | + byte[] minPacked = values.getMinPackedValue(); |
| 86 | + byte[] maxPacked = values.getMaxPackedValue(); |
| 87 | + if (minPacked == null || maxPacked == null) { |
| 88 | + continue; |
| 89 | + } |
| 90 | + |
| 91 | + int bytes = values.getBytesPerDimension(); |
| 92 | + Long min = decodeIntegral(minPacked, bytes); |
| 93 | + Long max = decodeIntegral(maxPacked, bytes); |
| 94 | + |
| 95 | + if (min != null && max != null) { |
| 96 | + found = true; |
| 97 | + globalMin = Math.min(globalMin, min); |
| 98 | + globalMax = Math.max(globalMax, max); |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + if (found) { |
| 103 | + return new MinMax(globalMin, globalMax); |
| 104 | + } |
| 105 | + |
| 106 | + // ---- 2. Try DocValuesSkipper (fast metadata) ---- |
| 107 | + long sMin = DocValuesSkipper.globalMinValue(reader, field); |
| 108 | + long sMax = DocValuesSkipper.globalMaxValue(reader, field); |
| 109 | + |
| 110 | + if (isValidSkipperRange(sMin, sMax)) { |
| 111 | + return new MinMax(sMin, sMax); |
| 112 | + } |
| 113 | + |
| 114 | + // ---- 3. Guaranteed fallback: scan NumericDocValues ---- |
| 115 | + return scanNumericDocValues(reader, field); |
| 116 | + } |
| 117 | + |
| 118 | + /** Decode integral numeric point values only */ |
| 119 | + private static Long decodeIntegral(byte[] packed, int bytesPerDim) { |
| 120 | + switch (bytesPerDim) { |
| 121 | + case Integer.BYTES: |
| 122 | + return (long) NumericUtils.sortableBytesToInt(packed, 0); |
| 123 | + case Long.BYTES: |
| 124 | + return NumericUtils.sortableBytesToLong(packed, 0); |
| 125 | + default: |
| 126 | + return null; // float/double unsupported |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + /** Validate skipper sentinel semantics */ |
| 131 | + private static boolean isValidSkipperRange(long min, long max) { |
| 132 | + if (min == Long.MAX_VALUE && max == Long.MIN_VALUE) return false; |
| 133 | + if (min == Long.MIN_VALUE && max == Long.MAX_VALUE) return false; |
| 134 | + return true; |
| 135 | + } |
| 136 | + |
| 137 | + /** Full scan fallback for NumericDocValues */ |
| 138 | + private static MinMax scanNumericDocValues(IndexReader reader, String field) throws IOException { |
| 139 | + boolean found = false; |
| 140 | + long min = Long.MAX_VALUE; |
| 141 | + long max = Long.MIN_VALUE; |
| 142 | + |
| 143 | + for (LeafReaderContext ctx : reader.leaves()) { |
| 144 | + LeafReader leaf = ctx.reader(); |
| 145 | + NumericDocValues values = leaf.getNumericDocValues(field); |
| 146 | + if (values == null) continue; |
| 147 | + |
| 148 | + while (values.nextDoc() != NumericDocValues.NO_MORE_DOCS) { |
| 149 | + long v = values.longValue(); |
| 150 | + found = true; |
| 151 | + min = Math.min(min, v); |
| 152 | + max = Math.max(max, v); |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + return found ? new MinMax(min, max) : null; |
| 157 | + } |
| 158 | +} |
0 commit comments