Skip to content

Commit 89b286e

Browse files
Optimize LongValueFacetCutter with leaf-local ordinal cache
1 parent e26a959 commit 89b286e

2 files changed

Lines changed: 163 additions & 1 deletion

File tree

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.benchmark.jmh;
18+
19+
import java.io.IOException;
20+
import java.nio.file.Files;
21+
import java.nio.file.Path;
22+
import java.util.Comparator;
23+
import java.util.Random;
24+
import java.util.concurrent.TimeUnit;
25+
import java.util.stream.Stream;
26+
import org.apache.lucene.document.Document;
27+
import org.apache.lucene.document.LongPoint;
28+
import org.apache.lucene.document.NumericDocValuesField;
29+
import org.apache.lucene.document.SortedNumericDocValuesField;
30+
import org.apache.lucene.index.DirectoryReader;
31+
import org.apache.lucene.index.IndexReader;
32+
import org.apache.lucene.index.IndexWriter;
33+
import org.apache.lucene.index.IndexWriterConfig;
34+
import org.apache.lucene.sandbox.facet.FacetFieldCollectorManager;
35+
import org.apache.lucene.sandbox.facet.cutters.LongValueFacetCutter;
36+
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
37+
import org.apache.lucene.search.IndexSearcher;
38+
import org.apache.lucene.search.MatchAllDocsQuery;
39+
import org.apache.lucene.store.Directory;
40+
import org.apache.lucene.store.MMapDirectory;
41+
import org.openjdk.jmh.annotations.Benchmark;
42+
import org.openjdk.jmh.annotations.BenchmarkMode;
43+
import org.openjdk.jmh.annotations.Fork;
44+
import org.openjdk.jmh.annotations.Level;
45+
import org.openjdk.jmh.annotations.Measurement;
46+
import org.openjdk.jmh.annotations.Mode;
47+
import org.openjdk.jmh.annotations.OutputTimeUnit;
48+
import org.openjdk.jmh.annotations.Param;
49+
import org.openjdk.jmh.annotations.Scope;
50+
import org.openjdk.jmh.annotations.Setup;
51+
import org.openjdk.jmh.annotations.State;
52+
import org.openjdk.jmh.annotations.TearDown;
53+
import org.openjdk.jmh.annotations.Warmup;
54+
55+
/**
56+
* JMH benchmark for {@link LongValueFacetCutter} throughput.
57+
*
58+
*/
59+
@State(Scope.Thread)
60+
@BenchmarkMode(Mode.Throughput)
61+
@OutputTimeUnit(TimeUnit.SECONDS)
62+
@Fork(value = 1, warmups = 1)
63+
@Warmup(iterations = 2, time = 2)
64+
@Measurement(iterations = 5, time = 3)
65+
public class LongValueFacetCutterBenchmark {
66+
Directory dir;
67+
IndexReader reader;
68+
IndexSearcher searcher;
69+
Path path;
70+
71+
@Setup(Level.Trial)
72+
public void setup(BenchmarkParams params) throws Exception {
73+
path = Files.createTempDirectory("longValueFacetCutter");
74+
dir = MMapDirectory.open(path);
75+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig());
76+
Random r = new Random(42);
77+
78+
for (int i = 0; i < params.docCount; i++) {
79+
Document doc = new Document();
80+
// Indexed point for range query filtering
81+
doc.add(new LongPoint("id", i));
82+
if (params.multiValued) {
83+
int numValues = r.nextInt(1, 4);
84+
for (int v = 0; v < numValues; v++) {
85+
doc.add(new SortedNumericDocValuesField("f", r.nextInt(0, params.cardinality)));
86+
}
87+
} else {
88+
doc.add(new NumericDocValuesField("f", r.nextInt(0, params.cardinality)));
89+
}
90+
w.addDocument(doc);
91+
}
92+
w.forceMerge(1, true);
93+
reader = DirectoryReader.open(w);
94+
searcher = new IndexSearcher(reader);
95+
w.close();
96+
}
97+
98+
@TearDown(Level.Trial)
99+
public void tearDown() throws Exception {
100+
reader.close();
101+
if (dir != null) {
102+
dir.close();
103+
dir = null;
104+
}
105+
if (Files.exists(path)) {
106+
try (Stream<Path> walk = Files.walk(path)) {
107+
walk.sorted(Comparator.reverseOrder())
108+
.forEach(
109+
p -> {
110+
try {
111+
Files.delete(p);
112+
} catch (IOException _) {
113+
// ignore
114+
}
115+
});
116+
}
117+
}
118+
}
119+
120+
@State(Scope.Benchmark)
121+
public static class BenchmarkParams {
122+
@Param({"100000", "1000000"})
123+
public int docCount;
124+
125+
@Param({"100", "10000"})
126+
public int cardinality;
127+
128+
@Param({"false", "true"})
129+
public boolean multiValued;
130+
}
131+
132+
/** Facet count over all documents. */
133+
@Benchmark
134+
public CountFacetRecorder matchAll(BenchmarkParams params) throws IOException {
135+
LongValueFacetCutter cutter = new LongValueFacetCutter("f");
136+
CountFacetRecorder recorder = new CountFacetRecorder();
137+
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
138+
new FacetFieldCollectorManager<>(cutter, recorder);
139+
searcher.search(MatchAllDocsQuery.INSTANCE, collectorManager);
140+
return recorder;
141+
}
142+
143+
/** Facet count over ~10% of documents filtered by a point range query. */
144+
@Benchmark
145+
public CountFacetRecorder filteredRange(BenchmarkParams params) throws IOException {
146+
long lower = params.docCount / 4;
147+
long upper = lower + params.docCount / 10;
148+
LongValueFacetCutter cutter = new LongValueFacetCutter("f");
149+
CountFacetRecorder recorder = new CountFacetRecorder();
150+
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
151+
new FacetFieldCollectorManager<>(cutter, recorder);
152+
searcher.search(LongPoint.newRangeQuery("id", lower, upper), collectorManager);
153+
return recorder;
154+
}
155+
}

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/LongValueFacetCutter.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public LongValueFacetCutter(String field) {
6161
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
6262
SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), field);
6363
return new LeafFacetCutter() {
64+
final LongIntHashMap localCache = new LongIntHashMap();
6465
int docValueCount;
6566
long lastDocValue;
6667
int docValueCursor;
@@ -83,7 +84,13 @@ public int nextOrd() throws IOException {
8384
// check previous value to remove duplicates
8485
if (docValueCursor == 1 || value != lastDocValue) {
8586
lastDocValue = value;
86-
return valueToOrdMap.computeIfAbsent(value, maxOrdinal::incrementAndGet);
87+
int ord = localCache.getOrDefault(value, -1);
88+
if (ord != -1) {
89+
return ord;
90+
}
91+
ord = valueToOrdMap.computeIfAbsent(value, maxOrdinal::incrementAndGet);
92+
localCache.put(value, ord);
93+
return ord;
8794
}
8895
}
8996
return NO_MORE_ORDS;

0 commit comments

Comments
 (0)