Make FSTCompiler.compile() to only return the FSTMetadata (#12831)

* Make FSTCompiler.compile() to only return the FSTMetadata

* tidy code
This commit is contained in:
Dzung Bui 2024-02-06 00:58:56 +09:00 committed by GitHub
parent c02f5473b2
commit 63d4ba938f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 149 additions and 90 deletions

View File

@ -80,6 +80,10 @@ API Changes
* GITHUB#12875: Ensure token position is always increased in PathHierarchyTokenizer and ReversePathHierarchyTokenizer
and resulting tokens do not overlap. (Michael Froh, Lukáš Vlček)
* GITHUB#12624, GITHUB#12831: Allow FSTCompiler to stream to any DataOutput while building, and
make compile() only return the FSTMetadata. For on-heap (default) use case, please use
FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader()) to create the FST. (Anh Dung Bui)
New Features
---------------------

View File

@ -111,7 +111,7 @@ public class NormalizeCharMap {
for (Map.Entry<String, String> ent : pendingPairs.entrySet()) {
fstCompiler.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue()));
}
map = fstCompiler.compile();
map = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)

View File

@ -51,7 +51,7 @@ class ConvTable {
fstCompiler.add(scratchInts.get(), new CharsRef(entry.getValue()));
}
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}

View File

@ -657,7 +657,7 @@ public class Dictionary {
}
fstCompiler.add(scratch.get(), output);
}
return fstCompiler.compile();
return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
/**

View File

@ -222,7 +222,8 @@ public final class StemmerOverrideFilter extends TokenFilter {
intsSpare.copyUTF8Bytes(bytesRef);
fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
}
return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase);
return new StemmerOverrideMap(
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), ignoreCase);
}
}
}

View File

@ -291,7 +291,7 @@ public class SynonymMap {
fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
}
FST<BytesRef> fst = fstCompiler.compile();
FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
return new SynonymMap(fst, words, maxHorizontalContext);
}
}

View File

@ -126,7 +126,7 @@ class TokenInfoDictionaryBuilder {
dictionary.addMapping((int) ord, offset);
offset = next;
}
dictionary.setFST(fstCompiler.compile());
dictionary.setFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
return dictionary;
}

View File

@ -147,7 +147,9 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
segmentations.add(wordIdAndLength);
ord++;
}
this.fst = new TokenInfoFST(fstCompiler.compile(), false);
this.fst =
new TokenInfoFST(
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false);
this.morphAtts = new UserMorphData(data.toArray(new String[0]));
this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
}

View File

@ -122,7 +122,7 @@ class TokenInfoDictionaryBuilder {
dictionary.addMapping((int) ord, offset);
offset = next;
}
dictionary.setFST(fstCompiler.compile());
dictionary.setFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
return dictionary;
}
}

View File

@ -130,7 +130,8 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
lastToken = token;
ord++;
}
this.fst = new TokenInfoFST(fstCompiler.compile());
this.fst =
new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
short[] rightIds = new short[_rightIds.size()];
for (int i = 0; i < _rightIds.size(); i++) {

View File

@ -498,7 +498,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
}
}
index = fstCompiler.compile();
index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assert subIndices == null;

View File

@ -216,7 +216,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
fstCompiler.add(
Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue());
}
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
@Override

View File

@ -283,7 +283,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
@Override
public void finish(long termsFilePointer) throws IOException {
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
if (fst != null) {
fst.save(out, out);
}

View File

@ -425,7 +425,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
assert sumTotalTermCount == totFloorTermCount;
index = fstCompiler.compile();
index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assert subIndices == null;
/*

View File

@ -277,7 +277,8 @@ public class FSTTermsWriter extends FieldsConsumer {
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// save FST dict
if (numTerms > 0) {
final FST<FSTTermOutputs.TermData> fst = fstCompiler.compile();
final FST<FSTTermOutputs.TermData> fst =
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
fields.add(
new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst));
}

View File

@ -738,7 +738,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
}
docCount = visitedDocs.cardinality();
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
/*
PrintStream ps = new PrintStream("out.dot");
fst.toDot(ps);

View File

@ -185,7 +185,8 @@ public class FSTDictionary implements IndexDictionary {
@Override
public FSTDictionary build() throws IOException {
return new FSTDictionary(fstCompiler.compile());
return new FSTDictionary(
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
}
}
}

View File

@ -587,7 +587,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
}
}
index = fstCompiler.compile();
index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assert subIndices == null;

View File

@ -437,6 +437,21 @@ public final class FST<T> implements Accountable {
this.fstReader = fstReader;
}
/**
* Create a FST from a {@link FSTReader}. Return null if the metadata is null.
*
* @param fstMetadata the metadata
* @param fstReader the FSTReader
* @return the FST
*/
public static <T> FST<T> fromFSTReader(FSTMetadata<T> fstMetadata, FSTReader fstReader) {
// FSTMetadata could be null if there is no node accepted by the FST
if (fstMetadata == null) {
return null;
}
return new FST<>(fstMetadata, Objects.requireNonNull(fstReader, "FSTReader cannot be null"));
}
/**
* Read the FST metadata from DataInput
*
@ -516,9 +531,7 @@ public final class FST<T> implements Accountable {
}
/**
* Save the FST to DataOutput. If you use an {@link org.apache.lucene.store.IndexOutput} to build
* the FST, then you should not and do not need to call this method, as the FST is already saved.
* Doing so will throw an {@link UnsupportedOperationException}.
* Save the FST to DataOutput.
*
* @param metaOut the DataOutput to write the metadata to
* @param out the DataOutput to write the FST bytes to

View File

@ -99,6 +99,7 @@ public class FSTCompiler<T> {
private static final FSTReader NULL_FST_READER = new NullFSTReader();
private final NodeHash<T> dedupHash;
// a temporary FST used during building for NodeHash cache
final FST<T> fst;
private final T NO_OUTPUT;
@ -173,9 +174,7 @@ public class FSTCompiler<T> {
paddingBytePending = true;
this.dataOutput = dataOutput;
fst =
new FST<>(
new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0),
toFSTReader(dataOutput));
new FST<>(new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0), NULL_FST_READER);
if (suffixRAMLimitMB < 0) {
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
} else if (suffixRAMLimitMB > 0) {
@ -193,16 +192,6 @@ public class FSTCompiler<T> {
}
}
// Get the respective FSTReader of the DataOutput. If the DataOutput is also a FSTReader then we
// will use it, otherwise we will return a NullFSTReader. Attempting to read from a FST with
// NullFSTReader will throw UnsupportedOperationException
private FSTReader toFSTReader(DataOutput dataOutput) {
if (dataOutput instanceof FSTReader) {
return (FSTReader) dataOutput;
}
return NULL_FST_READER;
}
/**
* This class is used for FST backed by non-FSTReader DataOutput. It does not allow getting the
* reverse BytesReader nor writing to a DataOutput.
@ -227,6 +216,22 @@ public class FSTCompiler<T> {
}
}
/**
* Get the respective {@link FSTReader} of the {@link DataOutput}. To call this method, you need
* to use the default DataOutput or {@link #getOnHeapReaderWriter(int)}, otherwise we will throw
* an exception.
*
* @return the DataOutput as FSTReader
* @throws IllegalStateException if the DataOutput does not implement FSTReader
*/
public FSTReader getFSTReader() {
if (dataOutput instanceof FSTReader) {
return (FSTReader) dataOutput;
}
throw new IllegalStateException(
"The DataOutput must implement FSTReader, but got " + dataOutput);
}
/**
* Fluent-style constructor for FST {@link FSTCompiler}.
*
@ -967,10 +972,31 @@ public class FSTCompiler<T> {
return output == NO_OUTPUT || !output.equals(NO_OUTPUT);
}
/** Returns final FST. NOTE: this will return null if nothing is accepted by the FST. */
// TODO: make this method to only return the FSTMetadata and user needs to construct the FST
// themselves
public FST<T> compile() throws IOException {
/**
* Returns the metadata of the final FST. NOTE: this will return null if nothing is accepted by
* the FST themselves.
*
* <p>To create the FST, you need to:
*
* <p>- If a FSTReader DataOutput was used, such as the one returned by {@link
* #getOnHeapReaderWriter(int)}
*
* <pre class="prettyprint">
* fstMetadata = fstCompiler.compile();
* fst = FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader());
* </pre>
*
* <p>- If a non-FSTReader DataOutput was used, such as {@link
* org.apache.lucene.store.IndexOutput}, you need to first create the corresponding {@link
* org.apache.lucene.store.DataInput}, such as {@link org.apache.lucene.store.IndexInput} then
* pass it to the FST construct
*
* <pre class="prettyprint">
* fstMetadata = fstCompiler.compile();
* fst = new FST&lt;&gt;(fstMetadata, dataInput, new OffHeapFSTStore());
* </pre>
*/
public FST.FSTMetadata<T> compile() throws IOException {
final UnCompiledNode<T> root = frontier[0];
@ -990,7 +1016,7 @@ public class FSTCompiler<T> {
// root.output=" + root.output);
finish(compileNode(root).node);
return fst;
return fst.metadata;
}
/** Expert: holds a pending (seen but not yet serialized) arc. */

View File

@ -49,7 +49,7 @@
* scratchBytes.copyChars(inputValues[i]);
* fstCompiler.add(Util.toIntsRef(scratchBytes.toBytesRef(), scratchInts), outputValues[i]);
* }
* FST&lt;Long&gt; fst = fstCompiler.compile();
* FST&lt;Long&gt; fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
* </pre>
*
* Retrieval by key:

View File

@ -90,7 +90,7 @@ public class Test2BFST extends LuceneTestCase {
nextInput(r, ints2);
}
FST<Object> fst = fstCompiler.compile();
FST<Object> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
for (int verify = 0; verify < 2; verify++) {
System.out.println(
@ -183,7 +183,7 @@ public class Test2BFST extends LuceneTestCase {
nextInput(r, ints);
}
FST<BytesRef> fst = fstCompiler.compile();
FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
for (int verify = 0; verify < 2; verify++) {
System.out.println(
@ -273,7 +273,7 @@ public class Test2BFST extends LuceneTestCase {
nextInput(r, ints);
}
FST<Long> fst = fstCompiler.compile();
FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
for (int verify = 0; verify < 2; verify++) {

View File

@ -92,10 +92,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
nextInput(r, ints2);
}
FST<Object> fst = fstCompiler.compile();
FST.FSTMetadata<Object> fstMetadata = fstCompiler.compile();
indexOutput.close();
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore());
FST<Object> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
for (int verify = 0; verify < 2; verify++) {
System.out.println(
@ -180,10 +180,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
nextInput(r, ints);
}
FST<BytesRef> fst = fstCompiler.compile();
FST.FSTMetadata<BytesRef> fstMetadata = fstCompiler.compile();
indexOutput.close();
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore());
FST<BytesRef> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
for (int verify = 0; verify < 2; verify++) {
System.out.println(
@ -265,10 +265,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
nextInput(r, ints);
}
FST<Long> fst = fstCompiler.compile();
FST.FSTMetadata<Long> fstMetadata = fstCompiler.compile();
indexOutput.close();
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore());
FST<Long> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
for (int verify = 0; verify < 2; verify++) {

View File

@ -196,7 +196,7 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
}
last = entry;
}
return fstCompiler.compile();
return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
public static void main(String... args) throws Exception {
@ -333,7 +333,7 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
while ((inputOutput = fstEnum.next()) != null) {
fstCompiler.add(inputOutput.input, CharsRef.deepCopyOf(inputOutput.output));
}
return fstCompiler.compile();
return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
private static int walk(FST<CharsRef> read) throws IOException {

View File

@ -407,7 +407,7 @@ public class TestFSTs extends LuceneTestCase {
System.out.println(ord + " terms...");
}
}
FST<Long> fst = fstCompiler.compile();
FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
if (VERBOSE) {
System.out.println(
"FST: "
@ -569,7 +569,7 @@ public class TestFSTs extends LuceneTestCase {
System.out.println(
((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
FST<T> fst = fstCompiler.compile();
FST<T> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
long tEnd = System.nanoTime();
System.out.println(
((tEnd - tMid) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to finish/pack");
@ -774,7 +774,8 @@ public class TestFSTs extends LuceneTestCase {
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
fstCompiler.add(
Util.toIntsRef(newBytesRef("foobar"), new IntsRefBuilder()), outputs.getNoOutput());
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fstCompiler.compile());
final BytesRefFSTEnum<Object> fstEnum =
new BytesRefFSTEnum<>(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
assertNull(fstEnum.seekFloor(newBytesRef("foo")));
assertNull(fstEnum.seekCeil(newBytesRef("foobaz")));
}
@ -788,7 +789,7 @@ public class TestFSTs extends LuceneTestCase {
for (int i = 0; i < 10; i++) {
fstCompiler.add(Util.toIntsRef(newBytesRef(str), ints), outputs.getNoOutput());
}
FST<Object> fst = fstCompiler.compile();
FST<Object> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// count the input paths
int count = 0;
@ -863,7 +864,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(b, new IntsRefBuilder()), 42L);
fstCompiler.add(Util.toIntsRef(c, new IntsRefBuilder()), 13824324872317238L);
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assertEquals(13824324872317238L, (long) Util.get(fst, c));
assertEquals(42, (long) Util.get(fst, b));
@ -1107,7 +1108,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(term.get(), scratchIntsRef), nothing);
}
return fstCompiler.compile();
return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
void generate(ArrayList<String> out, StringBuilder b, char from, char to, int depth) {
@ -1173,7 +1174,7 @@ public class TestFSTs extends LuceneTestCase {
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
fstCompiler.add(Util.toUTF32("slat", new IntsRefBuilder()), 10L);
fstCompiler.add(Util.toUTF32("st", new IntsRefBuilder()), 17L);
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
StringWriter w = new StringWriter();
Util.toDot(fst, w, false, false);
@ -1190,7 +1191,7 @@ public class TestFSTs extends LuceneTestCase {
Util.toIntsRef(newBytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
fstCompiler.add(
Util.toIntsRef(newBytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
StringWriter w = new StringWriter();
// Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
Util.toDot(fst, w, false, false);
@ -1216,7 +1217,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L);
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
FST<Long> fst = fstCompiler.compile();
FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// save the FST to DataOutput, here it would not matter whether we are saving to different
// DataOutput for meta or not
@ -1252,7 +1253,6 @@ public class TestFSTs extends LuceneTestCase {
final Long nothing = outputs.getNoOutput();
final FSTCompiler<Long> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
final FST<Long> fst = fstCompiler.fst;
final FSTCompiler.UnCompiledNode<Long> rootNode =
new FSTCompiler.UnCompiledNode<>(fstCompiler, 0);
@ -1285,6 +1285,8 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.finish(fstCompiler.addNode(rootNode));
final FST<Long> fst = new FST<>(fstCompiler.fst.metadata, fstCompiler.getFSTReader());
StringWriter w = new StringWriter();
// Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
Util.toDot(fst, w, false, false);
@ -1333,7 +1335,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), 22L);
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L);
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
// Util.toDot(fst, w, false, false);
// w.close();
@ -1370,7 +1372,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(newBytesRef("adcde"), scratch), 17L);
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
final AtomicInteger rejectCount = new AtomicInteger();
Util.TopNSearcher<Long> searcher =
new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) {
@ -1433,7 +1435,8 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), outputs.newPair(22L, 57L));
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), outputs.newPair(7L, 36L));
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), outputs.newPair(17L, 85L));
final FST<Pair<Long, Long>> fst = fstCompiler.compile();
final FST<Pair<Long, Long>> fst =
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
// Util.toDot(fst, w, false, false);
// w.close();
@ -1492,7 +1495,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(newBytesRef(e.getKey()), scratch), e.getValue());
}
final FST<Long> fst = fstCompiler.compile();
final FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// System.out.println("SAVE out.dot");
// Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
// Util.toDot(fst, w, false, false);
@ -1619,7 +1622,8 @@ public class TestFSTs extends LuceneTestCase {
Util.toIntsRef(newBytesRef(e.getKey()), scratch), outputs.newPair(weight, output));
}
final FST<Pair<Long, Long>> fst = fstCompiler.compile();
final FST<Pair<Long, Long>> fst =
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// System.out.println("SAVE out.dot");
// Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
// Util.toDot(fst, w, false, false);
@ -1695,7 +1699,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(input.get(), newBytesRef(BytesRef.deepCopyOf(output)));
}
final FST<BytesRef> fst = fstCompiler.compile();
final FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
for (int arc = 0; arc < 6; arc++) {
input.setIntAt(0, arc);
final BytesRef result = Util.get(fst, input.get());
@ -1737,7 +1741,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(input.get(), term);
}
FST<BytesRef> fst = fstCompiler.compile();
FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
Arc<BytesRef> arc = new FST.Arc<>();
fst.getFirstArc(arc);
@ -1772,7 +1776,7 @@ public class TestFSTs extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(ac, new IntsRefBuilder()), 5L);
fstCompiler.add(Util.toIntsRef(bd, new IntsRefBuilder()), 7L);
FST<Long> fst = fstCompiler.compile();
FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assertEquals(3, (long) Util.get(fst, ab));
assertEquals(5, (long) Util.get(fst, ac));

View File

@ -116,6 +116,6 @@ public class TestUtil extends LuceneTestCase {
fstCompiler.add(
Util.toIntsRef(new BytesRef(word), new IntsRefBuilder()), outputs.getNoOutput());
}
return fstCompiler.compile();
return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
}

View File

@ -155,7 +155,7 @@ public class KnnVectorDict implements Closeable {
while (addOneLine(in, binOut)) {
// continue;
}
fstCompiler.compile().save(fstOut, fstOut);
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()).save(fstOut, fstOut);
binOut.writeInt(numFields - 1);
}
}

View File

@ -174,7 +174,7 @@ public class TestFSTsMisc extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L);
fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
fstCompiler.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L);
final FST<Object> fst = fstCompiler.compile();
final FST<Object> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
Object output = Util.get(fst, new BytesRef("a"));
assertNotNull(output);
@ -208,7 +208,7 @@ public class TestFSTsMisc extends LuceneTestCase {
fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
fstCompiler.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L);
final FST<Object> fst = fstCompiler.compile();
final FST<Object> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
Object output = Util.get(fst, new BytesRef(""));
assertNotNull(output);

View File

@ -407,7 +407,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
}
}
index = fstCompiler.compile();
index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
assert subIndices == null;

View File

@ -586,7 +586,7 @@ public class AnalyzingSuggester extends Lookup {
fstCompiler.add(scratchInts.get(), outputs.newPair(cost, br));
}
}
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
count = newCount;
// Util.dotToFile(fst, "/tmp/suggest.dot");

View File

@ -323,7 +323,7 @@ public class FreeTextSuggester extends Lookup {
fstCompiler.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
}
final FST<Long> newFst = fstCompiler.compile();
final FST<Long> newFst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
if (newFst == null) {
throw new IllegalArgumentException("need at least one suggestion");
}

View File

@ -104,7 +104,8 @@ final class NRTSuggesterBuilder {
* CompletionPostingsFormat.FSTLoadMode)})}
*/
public boolean store(DataOutput output) throws IOException {
final FST<PairOutputs.Pair<Long, BytesRef>> fst = fstCompiler.compile();
final FST<PairOutputs.Pair<Long, BytesRef>> fst =
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
if (fst == null) {
return false;
}

View File

@ -220,6 +220,6 @@ public class FSTCompletionBuilder {
}
}
return count == 0 ? null : fstCompiler.compile();
return count == 0 ? null : FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
}
}

View File

@ -126,7 +126,7 @@ public class WFSTCompletionLookup extends Lookup {
previous.copyBytes(scratch);
newCount++;
}
fst = fstCompiler.compile();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
count = newCount;
}

View File

@ -282,28 +282,33 @@ public class FSTTester<T> {
fstCompiler.add(pair.input, pair.output);
}
}
FST<T> fst = fstCompiler.compile();
FST<T> fst = null;
FST.FSTMetadata<T> fstMetadata = fstCompiler.compile();
if (useOffHeap) {
indexOutput.close();
if (fst == null) {
if (fstMetadata == null) {
dir.deleteFile("fstOffHeap.bin");
} else {
try (IndexInput in = dir.openInput("fstOffHeap.bin", IOContext.DEFAULT)) {
fst = new FST<>(fst.getMetadata(), in);
fst = new FST<>(fstMetadata, in);
} finally {
dir.deleteFile("fstOffHeap.bin");
}
}
} else if (random.nextBoolean() && fst != null) {
IOContext context = LuceneTestCase.newIOContext(random);
try (IndexOutput out = dir.createOutput("fst.bin", context)) {
fst.save(out, out);
}
try (IndexInput in = dir.openInput("fst.bin", context)) {
fst = new FST<>(FST.readMetadata(in, outputs), in);
} finally {
dir.deleteFile("fst.bin");
} else if (fstMetadata != null) {
fst = FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader());
if (random.nextBoolean()) {
IOContext context = LuceneTestCase.newIOContext(random);
try (IndexOutput out = dir.createOutput("fst.bin", context)) {
fst.save(out, out);
}
try (IndexInput in = dir.openInput("fst.bin", context)) {
fst = new FST<>(FST.readMetadata(in, outputs), in);
} finally {
dir.deleteFile("fst.bin");
}
}
}