Skip to content

Commit 67150f1

Browse files
committed
Make FSTPostingFormat to build FST off-heap
1 parent da69346 commit 67150f1

2 files changed

Lines changed: 80 additions & 42 deletions

File tree

lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -65,29 +65,28 @@
6565
public class FSTTermsReader extends FieldsProducer {
6666
private final TreeMap<String, TermsReader> fields = new TreeMap<>();
6767
private final PostingsReaderBase postingsReader;
68+
private final IndexInput fstMetaInput;
6869
private final IndexInput fstTermsInput;
6970

7071
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
7172
throws IOException {
72-
final String termsFileName =
73+
final String termsMetaFileName =
7374
IndexFileNames.segmentFileName(
74-
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
75+
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_META_EXTENSION);
76+
final String termsDataFileName =
77+
IndexFileNames.segmentFileName(
78+
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_DATA_EXTENSION);
7579

7680
this.postingsReader = postingsReader;
77-
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.LOAD);
81+
this.fstMetaInput = state.directory.openInput(termsMetaFileName, IOContext.LOAD);
82+
this.fstTermsInput = state.directory.openInput(termsDataFileName, IOContext.LOAD);
7883

79-
IndexInput in = this.fstTermsInput;
84+
IndexInput in = this.fstMetaInput;
8085

8186
boolean success = false;
8287
try {
83-
CodecUtil.checkIndexHeader(
84-
in,
85-
FSTTermsWriter.TERMS_CODEC_NAME,
86-
FSTTermsWriter.TERMS_VERSION_START,
87-
FSTTermsWriter.TERMS_VERSION_CURRENT,
88-
state.segmentInfo.getId(),
89-
state.segmentSuffix);
90-
CodecUtil.checksumEntireFile(in);
88+
verifyInput(state, in);
89+
verifyInput(state, fstTermsInput);
9190
this.postingsReader.init(in, state);
9291
seekDir(in);
9392

@@ -102,19 +101,32 @@ public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
102101
long sumDocFreq =
103102
fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
104103
int docCount = in.readVInt();
104+
long startFP = in.readVLong();
105+
fstTermsInput.seek(startFP);
105106
TermsReader current =
106-
new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
107+
new TermsReader(fieldInfo, in, fstTermsInput, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
107108
TermsReader previous = fields.put(fieldInfo.name, current);
108109
checkFieldSummary(state.segmentInfo, in, current, previous);
109110
}
110111
success = true;
111112
} finally {
112113
if (success == false) {
113-
IOUtils.closeWhileHandlingException(in);
114+
IOUtils.closeWhileHandlingException(in, fstTermsInput);
114115
}
115116
}
116117
}
117118

119+
private static void verifyInput(SegmentReadState state, IndexInput in) throws IOException {
120+
CodecUtil.checkIndexHeader(
121+
in,
122+
FSTTermsWriter.TERMS_CODEC_NAME,
123+
FSTTermsWriter.TERMS_VERSION_START,
124+
FSTTermsWriter.TERMS_VERSION_CURRENT,
125+
state.segmentInfo.getId(),
126+
state.segmentSuffix);
127+
CodecUtil.checksumEntireFile(in);
128+
}
129+
118130
private void seekDir(IndexInput in) throws IOException {
119131
in.seek(in.length() - CodecUtil.footerLength() - 8);
120132
in.seek(in.readLong());
@@ -165,7 +177,7 @@ public int size() {
165177
@Override
166178
public void close() throws IOException {
167179
try {
168-
IOUtils.close(postingsReader, fstTermsInput);
180+
IOUtils.close(postingsReader, fstMetaInput, fstTermsInput);
169181
} finally {
170182
fields.clear();
171183
}
@@ -182,7 +194,8 @@ final class TermsReader extends Terms {
182194

183195
TermsReader(
184196
FieldInfo fieldInfo,
185-
IndexInput in,
197+
IndexInput metaIn,
198+
IndexInput dataIn,
186199
long numTerms,
187200
long sumTotalTermFreq,
188201
long sumDocFreq,
@@ -195,8 +208,8 @@ final class TermsReader extends Terms {
195208
this.docCount = docCount;
196209
OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore();
197210
FSTTermOutputs outputs = new FSTTermOutputs(fieldInfo);
198-
this.dict = new FST<>(FST.readMetadata(in, outputs), in, offHeapFSTStore);
199-
in.skipBytes(offHeapFSTStore.size());
211+
this.dict = new FST<>(FST.readMetadata(metaIn, outputs), dataIn, offHeapFSTStore);
212+
dataIn.skipBytes(offHeapFSTStore.size());
200213
}
201214

202215
@Override
@@ -508,7 +521,11 @@ void decodeMetaData() throws IOException {
508521
if (meta.bytes != null) {
509522
bytesReader.reset(meta.bytes, 0, meta.bytes.length);
510523
}
511-
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
524+
try {
525+
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
526+
} catch (Exception ex) {
527+
System.out.println("bingo");
528+
}
512529
decoded = true;
513530
}
514531
}

lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -107,42 +107,55 @@
107107
* @lucene.experimental
108108
*/
109109
public class FSTTermsWriter extends FieldsConsumer {
110-
static final String TERMS_EXTENSION = "tfp";
110+
static final String TERMS_META_EXTENSION = "tfp.meta";
111+
static final String TERMS_DATA_EXTENSION = "tfp.data";
111112
static final String TERMS_CODEC_NAME = "FSTTerms";
112113
public static final int TERMS_VERSION_START = 2;
113114
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
114115

115116
final PostingsWriterBase postingsWriter;
116117
final FieldInfos fieldInfos;
117-
IndexOutput out;
118+
IndexOutput dataOut;
119+
IndexOutput metaOut;
118120
final int maxDoc;
119121
final List<FieldMetaData> fields = new ArrayList<>();
120122

121123
public FSTTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter)
122124
throws IOException {
123-
final String termsFileName =
125+
final String termsMetaFileName =
124126
IndexFileNames.segmentFileName(
125-
state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
127+
state.segmentInfo.name, state.segmentSuffix, TERMS_META_EXTENSION);
128+
final String termsDataFileName =
129+
IndexFileNames.segmentFileName(
130+
state.segmentInfo.name, state.segmentSuffix, TERMS_DATA_EXTENSION);
126131

127132
this.postingsWriter = postingsWriter;
128133
this.fieldInfos = state.fieldInfos;
129-
this.out = state.directory.createOutput(termsFileName, state.context);
134+
this.metaOut = state.directory.createOutput(termsMetaFileName, state.context);
135+
this.dataOut = state.directory.createOutput(termsDataFileName, state.context);
130136
this.maxDoc = state.segmentInfo.maxDoc();
131137

132138
boolean success = false;
133139
try {
134140
CodecUtil.writeIndexHeader(
135-
out,
141+
metaOut,
142+
TERMS_CODEC_NAME,
143+
TERMS_VERSION_CURRENT,
144+
state.segmentInfo.getId(),
145+
state.segmentSuffix);
146+
147+
CodecUtil.writeIndexHeader(
148+
dataOut,
136149
TERMS_CODEC_NAME,
137150
TERMS_VERSION_CURRENT,
138151
state.segmentInfo.getId(),
139152
state.segmentSuffix);
140153

141-
this.postingsWriter.init(out, state);
154+
this.postingsWriter.init(metaOut, state);
142155
success = true;
143156
} finally {
144157
if (!success) {
145-
IOUtils.closeWhileHandlingException(out);
158+
IOUtils.closeWhileHandlingException(metaOut, dataOut);
146159
}
147160
}
148161
}
@@ -187,33 +200,38 @@ public void write(Fields fields, NormsProducer norms) throws IOException {
187200

188201
@Override
189202
public void close() throws IOException {
190-
if (out != null) {
203+
if (metaOut != null) {
204+
assert dataOut != null;
191205
boolean success = false;
192206
try {
193207
// write field summary
194-
final long dirStart = out.getFilePointer();
208+
final long dirStart = metaOut.getFilePointer();
195209

196-
out.writeVInt(fields.size());
210+
metaOut.writeVInt(fields.size());
197211
for (FieldMetaData field : fields) {
198-
out.writeVInt(field.fieldInfo.number);
199-
out.writeVLong(field.numTerms);
212+
metaOut.writeVInt(field.fieldInfo.number);
213+
metaOut.writeVLong(field.numTerms);
200214
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
201-
out.writeVLong(field.sumTotalTermFreq);
215+
metaOut.writeVLong(field.sumTotalTermFreq);
202216
}
203-
out.writeVLong(field.sumDocFreq);
204-
out.writeVInt(field.docCount);
205-
field.dict.save(out, out);
217+
metaOut.writeVLong(field.sumDocFreq);
218+
metaOut.writeVInt(field.docCount);
219+
// write the starting file pointer
220+
metaOut.writeVLong(dataOut.getFilePointer() - field.dict.numBytes());
221+
field.dict.saveMetadata(metaOut);
206222
}
207-
writeTrailer(out, dirStart);
208-
CodecUtil.writeFooter(out);
223+
writeTrailer(metaOut, dirStart);
224+
CodecUtil.writeFooter(metaOut);
225+
CodecUtil.writeFooter(dataOut);
209226
success = true;
210227
} finally {
211228
if (success) {
212-
IOUtils.close(out, postingsWriter);
229+
IOUtils.close(metaOut, dataOut, postingsWriter);
213230
} else {
214-
IOUtils.closeWhileHandlingException(out, postingsWriter);
231+
IOUtils.closeWhileHandlingException(metaOut, dataOut, postingsWriter);
215232
}
216-
out = null;
233+
metaOut = null;
234+
dataOut = null;
217235
}
218236
}
219237
}
@@ -256,7 +274,9 @@ final class TermsWriter {
256274
this.fieldInfo = fieldInfo;
257275
postingsWriter.setField(fieldInfo);
258276
this.outputs = new FSTTermOutputs(fieldInfo);
259-
this.fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
277+
this.fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
278+
.dataOutput(dataOut)
279+
.build();
260280
}
261281

262282
public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
@@ -278,6 +298,7 @@ public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws
278298
// save FST dict
279299
if (numTerms > 0) {
280300
final FST<FSTTermOutputs.TermData> fst = fstCompiler.compile();
301+
fst.saveMetadata(metaOut);
281302
fields.add(
282303
new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst));
283304
}

0 commit comments

Comments
 (0)