mirror of https://github.com/apache/lucene.git
LUCENE-9353: Move terms metadata to its own file. (#1473)
This commit is contained in:
parent
c083e5414e
commit
87a3bef50f
|
@ -200,6 +200,10 @@ Improvements
|
|||
* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
|
||||
(Tomás Fernández Löbbe)
|
||||
|
||||
* LUCENE-9353: Metadata of the terms dictionary moved to its own file, with the
|
||||
`.tmd` extension. This allows checksums of metadata to be verified when
|
||||
opening indices and helps save seeks when opening an index. (Adrien Grand)
|
||||
|
||||
* LUCENE-9359: SegmentInfos#readCommit now always returns a
|
||||
CorruptIndexException if the content of the file is invalid. (Adrien Grand)
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.BufferedInputStream;
|
|||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
@ -44,7 +45,8 @@ public final class TokenInfoDictionary extends BinaryDictionary {
|
|||
super(resourceScheme, resourcePath);
|
||||
FST<Long> fst;
|
||||
try (InputStream is = new BufferedInputStream(getResource(FST_FILENAME_SUFFIX))) {
|
||||
fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
|
||||
DataInput in = new InputStreamDataInput(is);
|
||||
fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
|
||||
}
|
||||
// TODO: some way to configure?
|
||||
this.fst = new TokenInfoFST(fst, true);
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.BufferedInputStream;
|
|||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
@ -47,7 +48,8 @@ public final class TokenInfoDictionary extends BinaryDictionary {
|
|||
super(resourceScheme, resourcePath);
|
||||
FST<Long> fst;
|
||||
try (InputStream is = new BufferedInputStream(getResource(FST_FILENAME_SUFFIX))) {
|
||||
fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
|
||||
DataInput in = new InputStreamDataInput(is);
|
||||
fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
|
||||
}
|
||||
this.fst = new TokenInfoFST(fst);
|
||||
}
|
||||
|
|
|
@ -148,7 +148,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException {
|
||||
IndexInput clone = in.clone();
|
||||
clone.seek(indexStart);
|
||||
fst = new FST<>(clone, fstOutputs);
|
||||
fst = new FST<>(clone, clone, fstOutputs);
|
||||
clone.close();
|
||||
|
||||
/*
|
||||
|
|
|
@ -280,7 +280,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
public void finish(long termsFilePointer) throws IOException {
|
||||
fst = fstCompiler.compile();
|
||||
if (fst != null) {
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -832,7 +832,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
// Write FST to index
|
||||
indexStartFP = indexOut.getFilePointer();
|
||||
root.index.save(indexOut);
|
||||
root.index.save(indexOut, indexOut);
|
||||
//System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
|
||||
|
||||
// if (SAVE_DOT_FILES || DEBUG) {
|
||||
|
|
|
@ -78,7 +78,7 @@ final class OrdsFieldReader extends Terms implements Accountable {
|
|||
final IndexInput clone = indexIn.clone();
|
||||
//System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
||||
clone.seek(indexStartFP);
|
||||
index = new FST<>(clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS);
|
||||
index = new FST<>(clone, clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS);
|
||||
|
||||
/*
|
||||
if (true) {
|
||||
|
|
|
@ -176,7 +176,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo));
|
||||
this.dict = new FST<>(in, in, new FSTTermOutputs(fieldInfo));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -209,7 +209,7 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
field.dict.save(out);
|
||||
field.dict.save(out, out);
|
||||
}
|
||||
writeTrailer(out, dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
|
|
|
@ -71,10 +71,10 @@ public class FSTDictionary implements IndexDictionary {
|
|||
@Override
|
||||
public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
|
||||
if (blockEncoder == null) {
|
||||
fst.save(output);
|
||||
fst.save(output, output);
|
||||
} else {
|
||||
ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
|
||||
fst.save(bytesDataOutput);
|
||||
fst.save(bytesDataOutput, bytesDataOutput);
|
||||
BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
|
||||
output.writeVLong(encodedBytes.size());
|
||||
encodedBytes.writeTo(output);
|
||||
|
@ -98,8 +98,8 @@ public class FSTDictionary implements IndexDictionary {
|
|||
isFSTOnHeap = true;
|
||||
}
|
||||
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
||||
FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstOutputs)
|
||||
: new FST<>(fstDataInput, fstOutputs, new OffHeapFSTStore());
|
||||
FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstDataInput, fstOutputs)
|
||||
: new FST<>(fstDataInput, fstDataInput, fstOutputs, new OffHeapFSTStore());
|
||||
return new FSTDictionary(fst);
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -35,6 +34,7 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
|
@ -97,13 +97,20 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Suffixes are compressed to save space. */
|
||||
public static final int VERSION_COMPRESSED_SUFFIXES = 5;
|
||||
|
||||
/** Metadata is written to its own file. */
|
||||
public static final int VERSION_META_FILE = 6;
|
||||
|
||||
/** Current terms format. */
|
||||
public static final int VERSION_CURRENT = VERSION_COMPRESSED_SUFFIXES;
|
||||
public static final int VERSION_CURRENT = VERSION_META_FILE;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tip";
|
||||
final static String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex";
|
||||
|
||||
/** Extension of terms meta file */
|
||||
static final String TERMS_META_EXTENSION = "tmd";
|
||||
final static String TERMS_META_CODEC_NAME = "BlockTreeTermsMeta";
|
||||
|
||||
// Open input to the main terms dict file (_X.tib)
|
||||
final IndexInput termsIn;
|
||||
// Open input to the terms index file (_X.tip)
|
||||
|
@ -128,9 +135,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
this.postingsReader = postingsReader;
|
||||
this.segment = state.segmentInfo.name;
|
||||
|
||||
String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
|
||||
|
||||
try {
|
||||
String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
|
||||
termsIn = state.directory.openInput(termsName, state.context);
|
||||
version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
|
@ -138,66 +145,106 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
indexIn = state.directory.openInput(indexName, state.context);
|
||||
CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
// Have PostingsReader init itself
|
||||
postingsReader.init(termsIn, state);
|
||||
if (version < VERSION_META_FILE) {
|
||||
// Have PostingsReader init itself
|
||||
postingsReader.init(termsIn, state);
|
||||
|
||||
// Verifying the checksum against all bytes would be too costly, but for now we at least
|
||||
// verify proper structure of the checksum footer. This is cheap and can detect some forms
|
||||
// of corruption such as file truncation.
|
||||
CodecUtil.retrieveChecksum(indexIn);
|
||||
CodecUtil.retrieveChecksum(termsIn);
|
||||
// Verifying the checksum against all bytes would be too costly, but for now we at least
|
||||
// verify proper structure of the checksum footer. This is cheap and can detect some forms
|
||||
// of corruption such as file truncation.
|
||||
CodecUtil.retrieveChecksum(indexIn);
|
||||
CodecUtil.retrieveChecksum(termsIn);
|
||||
}
|
||||
|
||||
// Read per-field details
|
||||
seekDir(termsIn);
|
||||
seekDir(indexIn);
|
||||
String metaName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_META_EXTENSION);
|
||||
Map<String, FieldReader> fieldMap = null;
|
||||
Throwable priorE = null;
|
||||
long indexLength = -1, termsLength = -1;
|
||||
try (ChecksumIndexInput metaIn = version >= VERSION_META_FILE ? state.directory.openChecksumInput(metaName, state.context) : null) {
|
||||
try {
|
||||
final IndexInput indexMetaIn, termsMetaIn;
|
||||
if (version >= VERSION_META_FILE) {
|
||||
CodecUtil.checkIndexHeader(metaIn, TERMS_META_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
indexMetaIn = termsMetaIn = metaIn;
|
||||
postingsReader.init(metaIn, state);
|
||||
} else {
|
||||
seekDir(termsIn);
|
||||
seekDir(indexIn);
|
||||
indexMetaIn = indexIn;
|
||||
termsMetaIn = termsIn;
|
||||
}
|
||||
|
||||
final int numFields = termsIn.readVInt();
|
||||
if (numFields < 0) {
|
||||
throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
|
||||
}
|
||||
fieldMap = new HashMap<>((int) (numFields / 0.75f) + 1);
|
||||
for (int i = 0; i < numFields; ++i) {
|
||||
final int field = termsIn.readVInt();
|
||||
final long numTerms = termsIn.readVLong();
|
||||
if (numTerms <= 0) {
|
||||
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
|
||||
}
|
||||
final BytesRef rootCode = readBytesRef(termsIn);
|
||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
||||
}
|
||||
final long sumTotalTermFreq = termsIn.readVLong();
|
||||
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
|
||||
final int docCount = termsIn.readVInt();
|
||||
if (version < VERSION_META_LONGS_REMOVED) {
|
||||
final int longsSize = termsIn.readVInt();
|
||||
if (longsSize < 0) {
|
||||
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
|
||||
final int numFields = termsMetaIn.readVInt();
|
||||
if (numFields < 0) {
|
||||
throw new CorruptIndexException("invalid numFields: " + numFields, termsMetaIn);
|
||||
}
|
||||
fieldMap = new HashMap<>((int) (numFields / 0.75f) + 1);
|
||||
for (int i = 0; i < numFields; ++i) {
|
||||
final int field = termsMetaIn.readVInt();
|
||||
final long numTerms = termsMetaIn.readVLong();
|
||||
if (numTerms <= 0) {
|
||||
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsMetaIn);
|
||||
}
|
||||
final BytesRef rootCode = readBytesRef(termsMetaIn);
|
||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("invalid field number: " + field, termsMetaIn);
|
||||
}
|
||||
final long sumTotalTermFreq = termsMetaIn.readVLong();
|
||||
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsMetaIn.readVLong();
|
||||
final int docCount = termsMetaIn.readVInt();
|
||||
if (version < VERSION_META_LONGS_REMOVED) {
|
||||
final int longsSize = termsMetaIn.readVInt();
|
||||
if (longsSize < 0) {
|
||||
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsMetaIn);
|
||||
}
|
||||
}
|
||||
BytesRef minTerm = readBytesRef(termsMetaIn);
|
||||
BytesRef maxTerm = readBytesRef(termsMetaIn);
|
||||
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsMetaIn);
|
||||
}
|
||||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsMetaIn);
|
||||
}
|
||||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsMetaIn);
|
||||
}
|
||||
final long indexStartFP = indexMetaIn.readVLong();
|
||||
FieldReader previous = fieldMap.put(fieldInfo.name,
|
||||
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, indexMetaIn, indexIn, minTerm, maxTerm));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsMetaIn);
|
||||
}
|
||||
}
|
||||
if (version >= VERSION_META_FILE) {
|
||||
indexLength = metaIn.readLong();
|
||||
termsLength = metaIn.readLong();
|
||||
}
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
if (metaIn != null) {
|
||||
CodecUtil.checkFooter(metaIn, priorE);
|
||||
} else if (priorE != null) {
|
||||
IOUtils.rethrowAlways(priorE);
|
||||
}
|
||||
}
|
||||
BytesRef minTerm = readBytesRef(termsIn);
|
||||
BytesRef maxTerm = readBytesRef(termsIn);
|
||||
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsIn);
|
||||
}
|
||||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
|
||||
}
|
||||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
|
||||
}
|
||||
final long indexStartFP = indexIn.readVLong();
|
||||
FieldReader previous = fieldMap.put(fieldInfo.name,
|
||||
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, indexIn, minTerm, maxTerm));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
|
||||
}
|
||||
}
|
||||
if (version >= VERSION_META_FILE) {
|
||||
// At this point the checksum of the meta file has been verified so the lengths are likely correct
|
||||
CodecUtil.retrieveChecksum(indexIn, indexLength);
|
||||
CodecUtil.retrieveChecksum(termsIn, termsLength);
|
||||
} else {
|
||||
assert indexLength == -1 : indexLength;
|
||||
assert termsLength == -1 : termsLength;
|
||||
}
|
||||
List<String> fieldList = new ArrayList<>(fieldMap.keySet());
|
||||
fieldList.sort(null);
|
||||
this.fieldMap = fieldMap;
|
||||
this.fieldList = Collections.unmodifiableList(fieldList);
|
||||
success = true;
|
||||
} finally {
|
||||
|
|
|
@ -211,6 +211,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
//private final static boolean SAVE_DOT_FILES = false;
|
||||
|
||||
private final IndexOutput metaOut;
|
||||
private final IndexOutput termsOut;
|
||||
private final IndexOutput indexOut;
|
||||
final int maxDoc;
|
||||
|
@ -220,34 +221,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
|
||||
private static class FieldMetaData {
|
||||
public final FieldInfo fieldInfo;
|
||||
public final BytesRef rootCode;
|
||||
public final long numTerms;
|
||||
public final long indexStartFP;
|
||||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final BytesRef minTerm;
|
||||
public final BytesRef maxTerm;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
BytesRef minTerm, BytesRef maxTerm) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
|
||||
this.rootCode = rootCode;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
}
|
||||
}
|
||||
|
||||
private final List<FieldMetaData> fields = new ArrayList<>();
|
||||
private final List<ByteBuffersDataOutput> fields = new ArrayList<>();
|
||||
|
||||
/** Create a new writer. The number of items (terms or
|
||||
* sub-blocks) per block will aim to be between
|
||||
|
@ -272,7 +246,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
|
||||
termsOut = state.directory.createOutput(termsName, state.context);
|
||||
boolean success = false;
|
||||
IndexOutput indexOut = null;
|
||||
IndexOutput metaOut = null, indexOut = null;
|
||||
try {
|
||||
CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
@ -283,27 +257,23 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
//segment = state.segmentInfo.name;
|
||||
|
||||
postingsWriter.init(termsOut, state); // have consumer write its format/header
|
||||
|
||||
final String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_META_EXTENSION);
|
||||
metaOut = state.directory.createOutput(metaName, state.context);
|
||||
CodecUtil.writeIndexHeader(metaOut, BlockTreeTermsReader.TERMS_META_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
postingsWriter.init(metaOut, state); // have consumer write its format/header
|
||||
|
||||
this.metaOut = metaOut;
|
||||
this.indexOut = indexOut;
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(termsOut, indexOut);
|
||||
IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes the terms file trailer. */
|
||||
private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
||||
/** Writes the index file trailer. */
|
||||
private void writeIndexTrailer(IndexOutput indexOut, long dirStart) throws IOException {
|
||||
indexOut.writeLong(dirStart);
|
||||
}
|
||||
|
||||
/** Throws {@code IllegalArgumentException} if any of these settings
|
||||
* is invalid. */
|
||||
public static void validateSettings(int minItemsInBlock, int maxItemsInBlock) {
|
||||
|
@ -548,7 +518,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final FixedBitSet docsSeen;
|
||||
long sumTotalTermFreq;
|
||||
long sumDocFreq;
|
||||
long indexStartFP;
|
||||
|
||||
// Records index into pending where the current prefix at that
|
||||
// length "started"; for example, if current term starts with 't',
|
||||
|
@ -1006,11 +975,27 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
assert pending.size() == 1 && !pending.get(0).isTerm: "pending.size()=" + pending.size() + " pending=" + pending;
|
||||
final PendingBlock root = (PendingBlock) pending.get(0);
|
||||
assert root.prefix.length == 0;
|
||||
assert root.index.getEmptyOutput() != null;
|
||||
final BytesRef rootCode = root.index.getEmptyOutput();
|
||||
assert rootCode != null;
|
||||
|
||||
ByteBuffersDataOutput metaOut = new ByteBuffersDataOutput();
|
||||
fields.add(metaOut);
|
||||
|
||||
metaOut.writeVInt(fieldInfo.number);
|
||||
metaOut.writeVLong(numTerms);
|
||||
metaOut.writeVInt(rootCode.length);
|
||||
metaOut.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
|
||||
assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
metaOut.writeVLong(sumTotalTermFreq);
|
||||
}
|
||||
metaOut.writeVLong(sumDocFreq);
|
||||
metaOut.writeVInt(docsSeen.cardinality());
|
||||
writeBytesRef(metaOut, new BytesRef(firstPendingTerm.termBytes));
|
||||
writeBytesRef(metaOut, new BytesRef(lastPendingTerm.termBytes));
|
||||
metaOut.writeVLong(indexOut.getFilePointer());
|
||||
// Write FST to index
|
||||
indexStartFP = indexOut.getFilePointer();
|
||||
root.index.save(indexOut);
|
||||
root.index.save(metaOut, indexOut);
|
||||
//System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
|
||||
|
||||
/*
|
||||
|
@ -1022,20 +1007,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
w.close();
|
||||
}
|
||||
*/
|
||||
assert firstPendingTerm != null;
|
||||
BytesRef minTerm = new BytesRef(firstPendingTerm.termBytes);
|
||||
|
||||
assert lastPendingTerm != null;
|
||||
BytesRef maxTerm = new BytesRef(lastPendingTerm.termBytes);
|
||||
|
||||
fields.add(new FieldMetaData(fieldInfo,
|
||||
((PendingBlock) pending.get(0)).index.getEmptyOutput(),
|
||||
numTerms,
|
||||
indexStartFP,
|
||||
sumTotalTermFreq,
|
||||
sumDocFreq,
|
||||
docsSeen.cardinality(),
|
||||
minTerm, maxTerm));
|
||||
} else {
|
||||
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
|
||||
assert sumDocFreq == 0;
|
||||
|
@ -1060,47 +1032,29 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
return;
|
||||
}
|
||||
closed = true;
|
||||
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
||||
final long dirStart = termsOut.getFilePointer();
|
||||
final long indexDirStart = indexOut.getFilePointer();
|
||||
|
||||
termsOut.writeVInt(fields.size());
|
||||
|
||||
for(FieldMetaData field : fields) {
|
||||
//System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
|
||||
termsOut.writeVInt(field.fieldInfo.number);
|
||||
assert field.numTerms > 0;
|
||||
termsOut.writeVLong(field.numTerms);
|
||||
termsOut.writeVInt(field.rootCode.length);
|
||||
termsOut.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
|
||||
assert field.fieldInfo.getIndexOptions() != IndexOptions.NONE;
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
termsOut.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
termsOut.writeVLong(field.sumDocFreq);
|
||||
termsOut.writeVInt(field.docCount);
|
||||
indexOut.writeVLong(field.indexStartFP);
|
||||
writeBytesRef(termsOut, field.minTerm);
|
||||
writeBytesRef(termsOut, field.maxTerm);
|
||||
metaOut.writeVInt(fields.size());
|
||||
for (ByteBuffersDataOutput fieldMeta : fields) {
|
||||
fieldMeta.copyTo(metaOut);
|
||||
}
|
||||
writeTrailer(termsOut, dirStart);
|
||||
CodecUtil.writeFooter(termsOut);
|
||||
writeIndexTrailer(indexOut, indexDirStart);
|
||||
CodecUtil.writeFooter(indexOut);
|
||||
metaOut.writeLong(indexOut.getFilePointer());
|
||||
CodecUtil.writeFooter(termsOut);
|
||||
metaOut.writeLong(termsOut.getFilePointer());
|
||||
CodecUtil.writeFooter(metaOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(termsOut, indexOut, postingsWriter);
|
||||
IOUtils.close(metaOut, termsOut, indexOut, postingsWriter);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(termsOut, indexOut, postingsWriter);
|
||||
IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut, postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeBytesRef(IndexOutput out, BytesRef bytes) throws IOException {
|
||||
private static void writeBytesRef(DataOutput out, BytesRef bytes) throws IOException {
|
||||
out.writeVInt(bytes.length);
|
||||
out.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
}
|
||||
|
|
|
@ -52,7 +52,6 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final long indexStartFP;
|
||||
final long rootBlockFP;
|
||||
final BytesRef rootCode;
|
||||
final BytesRef minTerm;
|
||||
|
@ -63,7 +62,7 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
//private boolean DEBUG;
|
||||
|
||||
FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
long indexStartFP, IndexInput metaIn, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
|
||||
|
@ -72,7 +71,6 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.rootCode = rootCode;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
|
@ -81,22 +79,22 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
// }
|
||||
rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
|
||||
// Initialize FST always off-heap.
|
||||
if (indexIn != null) {
|
||||
final IndexInput clone = indexIn.clone();
|
||||
clone.seek(indexStartFP);
|
||||
index = new FST<>(clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
||||
/*
|
||||
if (false) {
|
||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
|
||||
Util.toDot(index, w, false, false);
|
||||
System.out.println("FST INDEX: SAVED to " + dotFileName);
|
||||
w.close();
|
||||
}
|
||||
*/
|
||||
final IndexInput clone = indexIn.clone();
|
||||
clone.seek(indexStartFP);
|
||||
if (metaIn == indexIn) { // Only true before Lucene 8.6
|
||||
index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
||||
} else {
|
||||
index = null;
|
||||
index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
||||
}
|
||||
/*
|
||||
if (false) {
|
||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
|
||||
Util.toDot(index, w, false, false);
|
||||
System.out.println("FST INDEX: SAVED to " + dotFileName);
|
||||
w.close();
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -408,26 +408,26 @@ public final class FST<T> implements Accountable {
|
|||
private static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
|
||||
|
||||
/** Load a previously saved FST. */
|
||||
public FST(DataInput in, Outputs<T> outputs) throws IOException {
|
||||
this(in, outputs, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
|
||||
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs) throws IOException {
|
||||
this(metaIn, in, outputs, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
|
||||
}
|
||||
|
||||
/** Load a previously saved FST; maxBlockBits allows you to
|
||||
* control the size of the byte[] pages used to hold the FST bytes. */
|
||||
public FST(DataInput in, Outputs<T> outputs, FSTStore fstStore) throws IOException {
|
||||
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore) throws IOException {
|
||||
bytes = null;
|
||||
this.fstStore = fstStore;
|
||||
this.outputs = outputs;
|
||||
|
||||
// NOTE: only reads formats VERSION_START up to VERSION_CURRENT; we don't have
|
||||
// back-compat promise for FSTs (they are experimental), but we are sometimes able to offer it
|
||||
CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
|
||||
if (in.readByte() == 1) {
|
||||
CodecUtil.checkHeader(metaIn, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
|
||||
if (metaIn.readByte() == 1) {
|
||||
// accepts empty string
|
||||
// 1 KB blocks:
|
||||
BytesStore emptyBytes = new BytesStore(10);
|
||||
int numBytes = in.readVInt();
|
||||
emptyBytes.copyBytes(in, numBytes);
|
||||
int numBytes = metaIn.readVInt();
|
||||
emptyBytes.copyBytes(metaIn, numBytes);
|
||||
|
||||
// De-serialize empty-string output:
|
||||
BytesReader reader = emptyBytes.getReverseReader();
|
||||
|
@ -441,7 +441,7 @@ public final class FST<T> implements Accountable {
|
|||
} else {
|
||||
emptyOutput = null;
|
||||
}
|
||||
final byte t = in.readByte();
|
||||
final byte t = metaIn.readByte();
|
||||
switch(t) {
|
||||
case 0:
|
||||
inputType = INPUT_TYPE.BYTE1;
|
||||
|
@ -455,9 +455,9 @@ public final class FST<T> implements Accountable {
|
|||
default:
|
||||
throw new CorruptIndexException("invalid input type " + t, in);
|
||||
}
|
||||
startNode = in.readVLong();
|
||||
startNode = metaIn.readVLong();
|
||||
|
||||
long numBytes = in.readVLong();
|
||||
long numBytes = metaIn.readVLong();
|
||||
this.fstStore.init(in, numBytes);
|
||||
}
|
||||
|
||||
|
@ -502,16 +502,16 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
public void save(DataOutput out) throws IOException {
|
||||
public void save(DataOutput metaOut, DataOutput out) throws IOException {
|
||||
if (startNode == -1) {
|
||||
throw new IllegalStateException("call finish first");
|
||||
}
|
||||
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
|
||||
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
|
||||
// TODO: really we should encode this as an arc, arriving
|
||||
// to the root node, instead of special casing here:
|
||||
if (emptyOutput != null) {
|
||||
// Accepts empty string
|
||||
out.writeByte((byte) 1);
|
||||
metaOut.writeByte((byte) 1);
|
||||
|
||||
// Serialize empty-string output:
|
||||
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
|
||||
|
@ -528,10 +528,10 @@ public final class FST<T> implements Accountable {
|
|||
emptyOutputBytes[emptyLen - upto - 1] = b;
|
||||
upto++;
|
||||
}
|
||||
out.writeVInt(emptyLen);
|
||||
out.writeBytes(emptyOutputBytes, 0, emptyLen);
|
||||
metaOut.writeVInt(emptyLen);
|
||||
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
|
||||
} else {
|
||||
out.writeByte((byte) 0);
|
||||
metaOut.writeByte((byte) 0);
|
||||
}
|
||||
final byte t;
|
||||
if (inputType == INPUT_TYPE.BYTE1) {
|
||||
|
@ -541,11 +541,11 @@ public final class FST<T> implements Accountable {
|
|||
} else {
|
||||
t = 2;
|
||||
}
|
||||
out.writeByte(t);
|
||||
out.writeVLong(startNode);
|
||||
metaOut.writeByte(t);
|
||||
metaOut.writeVLong(startNode);
|
||||
if (bytes != null) {
|
||||
long numBytes = bytes.getPosition();
|
||||
out.writeVLong(numBytes);
|
||||
metaOut.writeVLong(numBytes);
|
||||
bytes.writeTo(out);
|
||||
} else {
|
||||
assert fstStore != null;
|
||||
|
@ -558,7 +558,8 @@ public final class FST<T> implements Accountable {
|
|||
*/
|
||||
public void save(final Path path) throws IOException {
|
||||
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
|
||||
save(new OutputStreamDataOutput(os));
|
||||
DataOutput out = new OutputStreamDataOutput(os);
|
||||
save(out, out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -567,7 +568,8 @@ public final class FST<T> implements Accountable {
|
|||
*/
|
||||
public static <T> FST<T> read(Path path, Outputs<T> outputs) throws IOException {
|
||||
try (InputStream is = Files.newInputStream(path)) {
|
||||
return new FST<>(new InputStreamDataInput(new BufferedInputStream(is)), outputs);
|
||||
DataInput in = new InputStreamDataInput(new BufferedInputStream(is));
|
||||
return new FST<>(in, in, outputs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -119,10 +119,10 @@ public class Test2BFST extends LuceneTestCase {
|
|||
if (verify == 0) {
|
||||
System.out.println("\nTEST: save/load FST and re-verify");
|
||||
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||
fst = new FST<>(in, outputs);
|
||||
fst = new FST<>(in, in, outputs);
|
||||
in.close();
|
||||
} else {
|
||||
dir.deleteFile("fst");
|
||||
|
@ -198,10 +198,10 @@ public class Test2BFST extends LuceneTestCase {
|
|||
if (verify == 0) {
|
||||
System.out.println("\nTEST: save/load FST and re-verify");
|
||||
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||
fst = new FST<>(in, outputs);
|
||||
fst = new FST<>(in, in, outputs);
|
||||
in.close();
|
||||
} else {
|
||||
dir.deleteFile("fst");
|
||||
|
@ -286,10 +286,10 @@ public class Test2BFST extends LuceneTestCase {
|
|||
if (verify == 0) {
|
||||
System.out.println("\nTEST: save/load FST and re-verify");
|
||||
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||
fst = new FST<>(in, outputs);
|
||||
fst = new FST<>(in, in, outputs);
|
||||
in.close();
|
||||
} else {
|
||||
dir.deleteFile("fst");
|
||||
|
|
|
@ -174,7 +174,7 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
|
|||
private static void countFSTArcs(String fstFilePath) throws IOException {
|
||||
byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
|
||||
DataInput in = new ByteArrayDataInput(buf);
|
||||
FST<BytesRef> fst = new FST<>(in, ByteSequenceOutputs.getSingleton());
|
||||
FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton());
|
||||
BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
|
||||
int binarySearchArcCount = 0, directAddressingArcCount = 0, listArcCount = 0;
|
||||
while(fstEnum.next() != null) {
|
||||
|
@ -228,7 +228,7 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
|
|||
|
||||
System.out.println("Reading FST");
|
||||
long startTimeMs = System.currentTimeMillis();
|
||||
FST<CharsRef> originalFst = new FST<>(in, CharSequenceOutputs.getSingleton());
|
||||
FST<CharsRef> originalFst = new FST<>(in, in, CharSequenceOutputs.getSingleton());
|
||||
long endTimeMs = System.currentTimeMillis();
|
||||
System.out.println("time = " + (endTimeMs - startTimeMs) + " ms");
|
||||
|
||||
|
|
|
@ -529,7 +529,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
Directory dir = FSDirectory.open(dirOut);
|
||||
IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
System.out.println("Saved FST to fst.bin.");
|
||||
|
||||
|
@ -1195,11 +1195,11 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// Make sure it still works after save/load:
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
|
||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||
final FST<Long> fst2 = new FST<>(in, outputs);
|
||||
final FST<Long> fst2 = new FST<>(in, in, outputs);
|
||||
checkStopNodes(fst2, outputs);
|
||||
in.close();
|
||||
dir.close();
|
||||
|
|
|
@ -779,7 +779,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
// Write FST to index
|
||||
indexStartFP = indexOut.getFilePointer();
|
||||
root.index.save(indexOut);
|
||||
root.index.save(indexOut, indexOut);
|
||||
//System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
|
||||
|
||||
// if (SAVE_DOT_FILES || DEBUG) {
|
||||
|
|
|
@ -74,7 +74,7 @@ final class VersionFieldReader extends Terms implements Accountable {
|
|||
final IndexInput clone = indexIn.clone();
|
||||
//System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
||||
clone.seek(indexStartFP);
|
||||
index = new FST<>(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);
|
||||
index = new FST<>(clone, clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);
|
||||
|
||||
/*
|
||||
if (false) {
|
||||
|
|
|
@ -598,7 +598,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
return false;
|
||||
}
|
||||
|
||||
fst.save(output);
|
||||
fst.save(output, output);
|
||||
output.writeVInt(maxAnalyzedPathsForOneInput);
|
||||
output.writeByte((byte) (hasPayloads ? 1 : 0));
|
||||
return true;
|
||||
|
@ -607,7 +607,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
@Override
|
||||
public boolean load(DataInput input) throws IOException {
|
||||
count = input.readVLong();
|
||||
this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
||||
this.fst = new FST<>(input, input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
||||
maxAnalyzedPathsForOneInput = input.readVInt();
|
||||
hasPayloads = input.readByte() == 1;
|
||||
return true;
|
||||
|
|
|
@ -360,7 +360,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
|||
output.writeByte(separator);
|
||||
output.writeVInt(grams);
|
||||
output.writeVLong(totTokens);
|
||||
fst.save(output);
|
||||
fst.save(output, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -378,7 +378,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
|||
}
|
||||
totTokens = input.readVLong();
|
||||
|
||||
fst = new FST<>(input, PositiveIntOutputs.getSingleton());
|
||||
fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -324,11 +324,11 @@ public final class NRTSuggester implements Accountable {
|
|||
OffHeapFSTStore store = new OffHeapFSTStore();
|
||||
IndexInput clone = input.clone();
|
||||
clone.seek(input.getFilePointer());
|
||||
fst = new FST<>(clone, new PairOutputs<>(
|
||||
fst = new FST<>(clone, clone, new PairOutputs<>(
|
||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()), store);
|
||||
input.seek(clone.getFilePointer() + store.size());
|
||||
} else {
|
||||
fst = new FST<>(input, new PairOutputs<>(
|
||||
fst = new FST<>(input, input, new PairOutputs<>(
|
||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
||||
}
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ final class NRTSuggesterBuilder {
|
|||
if (fst == null) {
|
||||
return false;
|
||||
}
|
||||
fst.save(output);
|
||||
fst.save(output, output);
|
||||
|
||||
/* write some more meta-info */
|
||||
assert maxAnalyzedPathsPerOutput > 0;
|
||||
|
|
|
@ -298,7 +298,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (normalCompletion == null || normalCompletion.getFST() == null) {
|
||||
return false;
|
||||
}
|
||||
normalCompletion.getFST().save(output);
|
||||
normalCompletion.getFST().save(output, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -306,7 +306,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
public synchronized boolean load(DataInput input) throws IOException {
|
||||
count = input.readVLong();
|
||||
this.higherWeightsCompletion = new FSTCompletion(new FST<>(
|
||||
input, NoOutputs.getSingleton()));
|
||||
input, input, NoOutputs.getSingleton()));
|
||||
this.normalCompletion = new FSTCompletion(
|
||||
higherWeightsCompletion.getFST(), false, exactMatchFirst);
|
||||
return true;
|
||||
|
|
|
@ -141,14 +141,14 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (fst == null) {
|
||||
return false;
|
||||
}
|
||||
fst.save(output);
|
||||
fst.save(output, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean load(DataInput input) throws IOException {
|
||||
count = input.readVLong();
|
||||
this.fst = new FST<>(input, PositiveIntOutputs.getSingleton());
|
||||
this.fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -296,11 +296,11 @@ public class FSTTester<T> {
|
|||
if (random.nextBoolean() && fst != null) {
|
||||
IOContext context = LuceneTestCase.newIOContext(random);
|
||||
IndexOutput out = dir.createOutput("fst.bin", context);
|
||||
fst.save(out);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("fst.bin", context);
|
||||
try {
|
||||
fst = new FST<T>(in, outputs);
|
||||
fst = new FST<T>(in, in, outputs);
|
||||
} finally {
|
||||
in.close();
|
||||
dir.deleteFile("fst.bin");
|
||||
|
|
Loading…
Reference in New Issue