mirror of https://github.com/apache/lucene.git
Remove FST constructors with DataInput for metadata (#12803)
* Remove FST constructor * Move Outputs to FSTMetadata
This commit is contained in:
parent
9b324a180f
commit
e04793d651
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ja.dict;
|
package org.apache.lucene.analysis.ja.dict;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -103,7 +105,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
||||||
FST<Long> fst;
|
FST<Long> fst;
|
||||||
try (InputStream is = new BufferedInputStream(fstResource.get())) {
|
try (InputStream is = new BufferedInputStream(fstResource.get())) {
|
||||||
DataInput in = new InputStreamDataInput(is);
|
DataInput in = new InputStreamDataInput(is);
|
||||||
fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
|
fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
|
||||||
}
|
}
|
||||||
// TODO: some way to configure?
|
// TODO: some way to configure?
|
||||||
this.fst = new TokenInfoFST(fst, true);
|
this.fst = new TokenInfoFST(fst, true);
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ko.dict;
|
package org.apache.lucene.analysis.ko.dict;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -102,7 +104,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
||||||
FST<Long> fst;
|
FST<Long> fst;
|
||||||
try (InputStream is = new BufferedInputStream(fstResource.get())) {
|
try (InputStream is = new BufferedInputStream(fstResource.get())) {
|
||||||
DataInput in = new InputStreamDataInput(is);
|
DataInput in = new InputStreamDataInput(is);
|
||||||
fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
|
fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
|
||||||
}
|
}
|
||||||
this.fst = new TokenInfoFST(fst);
|
this.fst = new TokenInfoFST(fst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.backward_codecs.lucene40.blocktree;
|
package org.apache.lucene.backward_codecs.lucene40.blocktree;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
@ -89,9 +91,17 @@ public final class FieldReader extends Terms {
|
||||||
final IndexInput clone = indexIn.clone();
|
final IndexInput clone = indexIn.clone();
|
||||||
clone.seek(indexStartFP);
|
clone.seek(indexStartFP);
|
||||||
if (metaIn == indexIn) { // Only true before Lucene 8.6
|
if (metaIn == indexIn) { // Only true before Lucene 8.6
|
||||||
index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
index =
|
||||||
|
new FST<>(
|
||||||
|
readMetadata(clone, ByteSequenceOutputs.getSingleton()),
|
||||||
|
clone,
|
||||||
|
new OffHeapFSTStore());
|
||||||
} else {
|
} else {
|
||||||
index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
index =
|
||||||
|
new FST<>(
|
||||||
|
readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
|
||||||
|
clone,
|
||||||
|
new OffHeapFSTStore());
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
if (false) {
|
if (false) {
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.codecs.blockterms;
|
package org.apache.lucene.codecs.blockterms;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -154,7 +156,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException {
|
public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException {
|
||||||
IndexInput clone = in.clone();
|
IndexInput clone = in.clone();
|
||||||
clone.seek(indexStart);
|
clone.seek(indexStart);
|
||||||
fst = new FST<>(clone, clone, fstOutputs);
|
fst = new FST<>(readMetadata(clone, fstOutputs), clone);
|
||||||
clone.close();
|
clone.close();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.codecs.blocktreeords;
|
package org.apache.lucene.codecs.blocktreeords;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -85,7 +87,7 @@ final class OrdsFieldReader extends Terms {
|
||||||
final IndexInput clone = indexIn.clone();
|
final IndexInput clone = indexIn.clone();
|
||||||
// System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
// System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
||||||
clone.seek(indexStartFP);
|
clone.seek(indexStartFP);
|
||||||
index = new FST<>(clone, clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS);
|
index = new FST<>(readMetadata(clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS), clone);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (true) {
|
if (true) {
|
||||||
|
|
|
@ -194,7 +194,8 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
this.sumDocFreq = sumDocFreq;
|
this.sumDocFreq = sumDocFreq;
|
||||||
this.docCount = docCount;
|
this.docCount = docCount;
|
||||||
OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore();
|
OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore();
|
||||||
this.dict = new FST<>(in, in, new FSTTermOutputs(fieldInfo), offHeapFSTStore);
|
FSTTermOutputs outputs = new FSTTermOutputs(fieldInfo);
|
||||||
|
this.dict = new FST<>(FST.readMetadata(in, outputs), in, offHeapFSTStore);
|
||||||
in.skipBytes(offHeapFSTStore.size());
|
in.skipBytes(offHeapFSTStore.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,10 +89,11 @@ public class FSTDictionary implements IndexDictionary {
|
||||||
isFSTOnHeap = true;
|
isFSTOnHeap = true;
|
||||||
}
|
}
|
||||||
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
||||||
|
FST.FSTMetadata<Long> metadata = FST.readMetadata(fstDataInput, fstOutputs);
|
||||||
FST<Long> fst =
|
FST<Long> fst =
|
||||||
isFSTOnHeap
|
isFSTOnHeap
|
||||||
? new FST<>(fstDataInput, fstDataInput, fstOutputs)
|
? new FST<>(metadata, fstDataInput)
|
||||||
: new FST<>(fstDataInput, fstDataInput, fstOutputs, new OffHeapFSTStore());
|
: new FST<>(metadata, fstDataInput, new OffHeapFSTStore());
|
||||||
return new FSTDictionary(fst);
|
return new FSTDictionary(fst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,11 @@ public final class FieldReader extends Terms {
|
||||||
// Initialize FST always off-heap.
|
// Initialize FST always off-heap.
|
||||||
final IndexInput clone = indexIn.clone();
|
final IndexInput clone = indexIn.clone();
|
||||||
clone.seek(indexStartFP);
|
clone.seek(indexStartFP);
|
||||||
index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
|
index =
|
||||||
|
new FST<>(
|
||||||
|
FST.readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
|
||||||
|
clone,
|
||||||
|
new OffHeapFSTStore());
|
||||||
/*
|
/*
|
||||||
if (false) {
|
if (false) {
|
||||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||||
|
|
|
@ -404,18 +404,8 @@ public final class FST<T> implements Accountable {
|
||||||
* Load a previously saved FST with a DataInput for metdata using an {@link OnHeapFSTStore} with
|
* Load a previously saved FST with a DataInput for metdata using an {@link OnHeapFSTStore} with
|
||||||
* maxBlockBits set to {@link #DEFAULT_MAX_BLOCK_BITS}
|
* maxBlockBits set to {@link #DEFAULT_MAX_BLOCK_BITS}
|
||||||
*/
|
*/
|
||||||
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs) throws IOException {
|
public FST(FSTMetadata<T> metadata, DataInput in) throws IOException {
|
||||||
this(metaIn, in, outputs, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
|
this(metadata, in, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load a previously saved FST with a DataInput for metdata and a FSTStore. If using {@link
|
|
||||||
* OnHeapFSTStore}, setting maxBlockBits allows you to control the size of the byte[] pages used
|
|
||||||
* to hold the FST bytes.
|
|
||||||
*/
|
|
||||||
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore)
|
|
||||||
throws IOException {
|
|
||||||
this(readMetadata(metaIn, outputs), in, outputs, fstStore);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -423,15 +413,14 @@ public final class FST<T> implements Accountable {
|
||||||
* OnHeapFSTStore}, setting maxBlockBits allows you to control the size of the byte[] pages used
|
* OnHeapFSTStore}, setting maxBlockBits allows you to control the size of the byte[] pages used
|
||||||
* to hold the FST bytes.
|
* to hold the FST bytes.
|
||||||
*/
|
*/
|
||||||
public FST(FSTMetadata<T> metadata, DataInput in, Outputs<T> outputs, FSTStore fstStore)
|
public FST(FSTMetadata<T> metadata, DataInput in, FSTStore fstStore) throws IOException {
|
||||||
throws IOException {
|
this(metadata, fstStore.init(in, metadata.numBytes));
|
||||||
this(metadata, outputs, fstStore.init(in, metadata.numBytes));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create the FST with a metadata object and a FSTReader. */
|
/** Create the FST with a metadata object and a FSTReader. */
|
||||||
FST(FSTMetadata<T> metadata, Outputs<T> outputs, FSTReader fstReader) {
|
FST(FSTMetadata<T> metadata, FSTReader fstReader) {
|
||||||
this.metadata = metadata;
|
this.metadata = metadata;
|
||||||
this.outputs = outputs;
|
this.outputs = metadata.outputs;
|
||||||
this.fstReader = fstReader;
|
this.fstReader = fstReader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -486,7 +475,7 @@ public final class FST<T> implements Accountable {
|
||||||
}
|
}
|
||||||
long startNode = metaIn.readVLong();
|
long startNode = metaIn.readVLong();
|
||||||
long numBytes = metaIn.readVLong();
|
long numBytes = metaIn.readVLong();
|
||||||
return new FSTMetadata<>(inputType, emptyOutput, startNode, version, numBytes);
|
return new FSTMetadata<>(inputType, outputs, emptyOutput, startNode, version, numBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -574,7 +563,7 @@ public final class FST<T> implements Accountable {
|
||||||
public static <T> FST<T> read(Path path, Outputs<T> outputs) throws IOException {
|
public static <T> FST<T> read(Path path, Outputs<T> outputs) throws IOException {
|
||||||
try (InputStream is = Files.newInputStream(path)) {
|
try (InputStream is = Files.newInputStream(path)) {
|
||||||
DataInput in = new InputStreamDataInput(new BufferedInputStream(is));
|
DataInput in = new InputStreamDataInput(new BufferedInputStream(is));
|
||||||
return new FST<>(in, in, outputs);
|
return new FST<>(readMetadata(in, outputs), in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1202,6 +1191,7 @@ public final class FST<T> implements Accountable {
|
||||||
*/
|
*/
|
||||||
public static final class FSTMetadata<T> {
|
public static final class FSTMetadata<T> {
|
||||||
final INPUT_TYPE inputType;
|
final INPUT_TYPE inputType;
|
||||||
|
final Outputs<T> outputs;
|
||||||
final int version;
|
final int version;
|
||||||
// if non-null, this FST accepts the empty string and
|
// if non-null, this FST accepts the empty string and
|
||||||
// produces this output
|
// produces this output
|
||||||
|
@ -1210,8 +1200,14 @@ public final class FST<T> implements Accountable {
|
||||||
long numBytes;
|
long numBytes;
|
||||||
|
|
||||||
public FSTMetadata(
|
public FSTMetadata(
|
||||||
INPUT_TYPE inputType, T emptyOutput, long startNode, int version, long numBytes) {
|
INPUT_TYPE inputType,
|
||||||
|
Outputs<T> outputs,
|
||||||
|
T emptyOutput,
|
||||||
|
long startNode,
|
||||||
|
int version,
|
||||||
|
long numBytes) {
|
||||||
this.inputType = inputType;
|
this.inputType = inputType;
|
||||||
|
this.outputs = outputs;
|
||||||
this.emptyOutput = emptyOutput;
|
this.emptyOutput = emptyOutput;
|
||||||
this.startNode = startNode;
|
this.startNode = startNode;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
|
|
|
@ -135,7 +135,7 @@ public class FSTCompiler<T> {
|
||||||
// pad: ensure no node gets address 0 which is reserved to mean
|
// pad: ensure no node gets address 0 which is reserved to mean
|
||||||
// the stop state w/ no arcs
|
// the stop state w/ no arcs
|
||||||
bytes.writeByte((byte) 0);
|
bytes.writeByte((byte) 0);
|
||||||
fst = new FST<>(new FST.FSTMetadata<>(inputType, null, -1, VERSION_CURRENT, 0), outputs, bytes);
|
fst = new FST<>(new FST.FSTMetadata<>(inputType, outputs, null, -1, VERSION_CURRENT, 0), bytes);
|
||||||
if (suffixRAMLimitMB < 0) {
|
if (suffixRAMLimitMB < 0) {
|
||||||
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
|
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
|
||||||
} else if (suffixRAMLimitMB > 0) {
|
} else if (suffixRAMLimitMB > 0) {
|
||||||
|
@ -702,21 +702,6 @@ public class FSTCompiler<T> {
|
||||||
* IntSequenceOutputs}) then you cannot reuse across calls.
|
* IntSequenceOutputs}) then you cannot reuse across calls.
|
||||||
*/
|
*/
|
||||||
public void add(IntsRef input, T output) throws IOException {
|
public void add(IntsRef input, T output) throws IOException {
|
||||||
/*
|
|
||||||
if (DEBUG) {
|
|
||||||
BytesRef b = new BytesRef(input.length);
|
|
||||||
for(int x=0;x<input.length;x++) {
|
|
||||||
b.bytes[x] = (byte) input.ints[x];
|
|
||||||
}
|
|
||||||
b.length = input.length;
|
|
||||||
if (output == NO_OUTPUT) {
|
|
||||||
System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
|
|
||||||
} else {
|
|
||||||
System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// De-dup NO_OUTPUT since it must be a singleton:
|
// De-dup NO_OUTPUT since it must be a singleton:
|
||||||
if (output.equals(NO_OUTPUT)) {
|
if (output.equals(NO_OUTPUT)) {
|
||||||
output = NO_OUTPUT;
|
output = NO_OUTPUT;
|
||||||
|
|
|
@ -141,7 +141,7 @@ public class Test2BFST extends LuceneTestCase {
|
||||||
fst.save(out, out);
|
fst.save(out, out);
|
||||||
out.close();
|
out.close();
|
||||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||||
fst = new FST<>(in, in, outputs);
|
fst = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
in.close();
|
in.close();
|
||||||
} else {
|
} else {
|
||||||
dir.deleteFile("fst");
|
dir.deleteFile("fst");
|
||||||
|
@ -228,7 +228,7 @@ public class Test2BFST extends LuceneTestCase {
|
||||||
fst.save(out, out);
|
fst.save(out, out);
|
||||||
out.close();
|
out.close();
|
||||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||||
fst = new FST<>(in, in, outputs);
|
fst = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
in.close();
|
in.close();
|
||||||
} else {
|
} else {
|
||||||
dir.deleteFile("fst");
|
dir.deleteFile("fst");
|
||||||
|
@ -320,7 +320,7 @@ public class Test2BFST extends LuceneTestCase {
|
||||||
fst.save(out, out);
|
fst.save(out, out);
|
||||||
out.close();
|
out.close();
|
||||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||||
fst = new FST<>(in, in, outputs);
|
fst = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
in.close();
|
in.close();
|
||||||
} else {
|
} else {
|
||||||
dir.deleteFile("fst");
|
dir.deleteFile("fst");
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.util.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -219,7 +221,7 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
|
||||||
private static void countFSTArcs(String fstFilePath) throws IOException {
|
private static void countFSTArcs(String fstFilePath) throws IOException {
|
||||||
byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
|
byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
|
||||||
DataInput in = new ByteArrayDataInput(buf);
|
DataInput in = new ByteArrayDataInput(buf);
|
||||||
FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton());
|
FST<BytesRef> fst = new FST<>(readMetadata(in, ByteSequenceOutputs.getSingleton()), in);
|
||||||
BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
|
BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
|
||||||
int binarySearchArcCount = 0,
|
int binarySearchArcCount = 0,
|
||||||
directAddressingArcCount = 0,
|
directAddressingArcCount = 0,
|
||||||
|
@ -286,7 +288,8 @@ public class TestFSTDirectAddressing extends LuceneTestCase {
|
||||||
|
|
||||||
System.out.println("Reading FST");
|
System.out.println("Reading FST");
|
||||||
long startTimeMs = System.nanoTime();
|
long startTimeMs = System.nanoTime();
|
||||||
FST<CharsRef> originalFst = new FST<>(in, in, CharSequenceOutputs.getSingleton());
|
FST<CharsRef> originalFst =
|
||||||
|
new FST<>(readMetadata(in, CharSequenceOutputs.getSingleton()), in);
|
||||||
long endTimeMs = System.nanoTime();
|
long endTimeMs = System.nanoTime();
|
||||||
System.out.println(
|
System.out.println(
|
||||||
"time = " + TimeUnit.NANOSECONDS.toMillis(endTimeMs - startTimeMs) + " ms");
|
"time = " + TimeUnit.NANOSECONDS.toMillis(endTimeMs - startTimeMs) + " ms");
|
||||||
|
|
|
@ -1226,7 +1226,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
// load the FST, which will force it to use FSTStore instead of BytesStore
|
// load the FST, which will force it to use FSTStore instead of BytesStore
|
||||||
ByteArrayDataInput in = new ByteArrayDataInput(outOS.toByteArray());
|
ByteArrayDataInput in = new ByteArrayDataInput(outOS.toByteArray());
|
||||||
FST<Long> loadedFST = new FST<>(in, in, outputs);
|
FST<Long> loadedFST = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
|
|
||||||
// now save the FST again, this time to different DataOutput for meta
|
// now save the FST again, this time to different DataOutput for meta
|
||||||
ByteArrayOutputStream metdataOS = new ByteArrayOutputStream();
|
ByteArrayOutputStream metdataOS = new ByteArrayOutputStream();
|
||||||
|
@ -1238,7 +1238,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// finally load it again
|
// finally load it again
|
||||||
ByteArrayDataInput metaIn = new ByteArrayDataInput(metdataOS.toByteArray());
|
ByteArrayDataInput metaIn = new ByteArrayDataInput(metdataOS.toByteArray());
|
||||||
ByteArrayDataInput dataIn = new ByteArrayDataInput(dataOS.toByteArray());
|
ByteArrayDataInput dataIn = new ByteArrayDataInput(dataOS.toByteArray());
|
||||||
loadedFST = new FST<>(metaIn, dataIn, outputs);
|
loadedFST = new FST<>(FST.readMetadata(metaIn, outputs), dataIn);
|
||||||
|
|
||||||
assertEquals(22L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aab"), scratch)).longValue());
|
assertEquals(22L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aab"), scratch)).longValue());
|
||||||
assertEquals(7L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aac"), scratch)).longValue());
|
assertEquals(7L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aac"), scratch)).longValue());
|
||||||
|
@ -1299,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
out.close();
|
out.close();
|
||||||
|
|
||||||
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
|
||||||
final FST<Long> fst2 = new FST<>(in, in, outputs);
|
final FST<Long> fst2 = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
checkStopNodes(fst2, outputs);
|
checkStopNodes(fst2, outputs);
|
||||||
in.close();
|
in.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.demo.knn;
|
package org.apache.lucene.demo.knn;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -58,7 +60,7 @@ public class KnnVectorDict implements Closeable {
|
||||||
*/
|
*/
|
||||||
public KnnVectorDict(Directory directory, String dictName) throws IOException {
|
public KnnVectorDict(Directory directory, String dictName) throws IOException {
|
||||||
try (IndexInput fstIn = directory.openInput(dictName + ".fst", IOContext.READ)) {
|
try (IndexInput fstIn = directory.openInput(dictName + ".fst", IOContext.READ)) {
|
||||||
fst = new FST<>(fstIn, fstIn, PositiveIntOutputs.getSingleton());
|
fst = new FST<>(readMetadata(fstIn, PositiveIntOutputs.getSingleton()), fstIn);
|
||||||
}
|
}
|
||||||
|
|
||||||
vectors = directory.openInput(dictName + ".bin", IOContext.READ);
|
vectors = directory.openInput(dictName + ".bin", IOContext.READ);
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.sandbox.codecs.idversion;
|
package org.apache.lucene.sandbox.codecs.idversion;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
@ -86,7 +88,7 @@ final class VersionFieldReader extends Terms {
|
||||||
final IndexInput clone = indexIn.clone();
|
final IndexInput clone = indexIn.clone();
|
||||||
// System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
// System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
||||||
clone.seek(indexStartFP);
|
clone.seek(indexStartFP);
|
||||||
index = new FST<>(clone, clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);
|
index = new FST<>(readMetadata(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS), clone);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (false) {
|
if (false) {
|
||||||
|
|
|
@ -612,12 +612,9 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
@Override
|
@Override
|
||||||
public boolean load(DataInput input) throws IOException {
|
public boolean load(DataInput input) throws IOException {
|
||||||
count = input.readVLong();
|
count = input.readVLong();
|
||||||
this.fst =
|
PairOutputs<Long, BytesRef> outputs =
|
||||||
new FST<>(
|
new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
|
||||||
input,
|
this.fst = new FST<>(FST.readMetadata(input, outputs), input);
|
||||||
input,
|
|
||||||
new PairOutputs<>(
|
|
||||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
|
||||||
maxAnalyzedPathsForOneInput = input.readVInt();
|
maxAnalyzedPathsForOneInput = input.readVInt();
|
||||||
hasPayloads = input.readByte() == 1;
|
hasPayloads = input.readByte() == 1;
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.search.suggest.analyzing;
|
||||||
// - test w/ syns
|
// - test w/ syns
|
||||||
// - add pruning of low-freq ngrams?
|
// - add pruning of low-freq ngrams?
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -384,7 +386,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
}
|
}
|
||||||
totTokens = input.readVLong();
|
totTokens = input.readVLong();
|
||||||
|
|
||||||
fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());
|
fst = new FST<>(readMetadata(input, PositiveIntOutputs.getSingleton()), input);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -337,25 +337,16 @@ public final class NRTSuggester implements Accountable {
|
||||||
*/
|
*/
|
||||||
public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
|
public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
|
||||||
final FST<Pair<Long, BytesRef>> fst;
|
final FST<Pair<Long, BytesRef>> fst;
|
||||||
|
PairOutputs<Long, BytesRef> outputs =
|
||||||
|
new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
|
||||||
if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
|
if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
|
||||||
OffHeapFSTStore store = new OffHeapFSTStore();
|
OffHeapFSTStore store = new OffHeapFSTStore();
|
||||||
IndexInput clone = input.clone();
|
IndexInput clone = input.clone();
|
||||||
clone.seek(input.getFilePointer());
|
clone.seek(input.getFilePointer());
|
||||||
fst =
|
fst = new FST<>(FST.readMetadata(clone, outputs), clone, store);
|
||||||
new FST<>(
|
|
||||||
clone,
|
|
||||||
clone,
|
|
||||||
new PairOutputs<>(
|
|
||||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()),
|
|
||||||
store);
|
|
||||||
input.seek(clone.getFilePointer() + store.size());
|
input.seek(clone.getFilePointer() + store.size());
|
||||||
} else {
|
} else {
|
||||||
fst =
|
fst = new FST<>(FST.readMetadata(input, outputs), input);
|
||||||
new FST<>(
|
|
||||||
input,
|
|
||||||
input,
|
|
||||||
new PairOutputs<>(
|
|
||||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* read some meta info */
|
/* read some meta info */
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.suggest.fst;
|
package org.apache.lucene.search.suggest.fst;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -301,7 +303,7 @@ public class FSTCompletionLookup extends Lookup {
|
||||||
public synchronized boolean load(DataInput input) throws IOException {
|
public synchronized boolean load(DataInput input) throws IOException {
|
||||||
count = input.readVLong();
|
count = input.readVLong();
|
||||||
this.higherWeightsCompletion =
|
this.higherWeightsCompletion =
|
||||||
new FSTCompletion(new FST<>(input, input, NoOutputs.getSingleton()));
|
new FSTCompletion(new FST<>(readMetadata(input, NoOutputs.getSingleton()), input));
|
||||||
this.normalCompletion =
|
this.normalCompletion =
|
||||||
new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);
|
new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.suggest.fst;
|
package org.apache.lucene.search.suggest.fst;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.fst.FST.readMetadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -141,7 +143,7 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
@Override
|
@Override
|
||||||
public boolean load(DataInput input) throws IOException {
|
public boolean load(DataInput input) throws IOException {
|
||||||
count = input.readVLong();
|
count = input.readVLong();
|
||||||
this.fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());
|
this.fst = new FST<>(readMetadata(input, PositiveIntOutputs.getSingleton()), input);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -281,7 +281,7 @@ public class FSTTester<T> {
|
||||||
fst.save(out, out);
|
fst.save(out, out);
|
||||||
}
|
}
|
||||||
try (IndexInput in = dir.openInput("fst.bin", context)) {
|
try (IndexInput in = dir.openInput("fst.bin", context)) {
|
||||||
fst = new FST<>(in, in, outputs);
|
fst = new FST<>(FST.readMetadata(in, outputs), in);
|
||||||
} finally {
|
} finally {
|
||||||
dir.deleteFile("fst.bin");
|
dir.deleteFile("fst.bin");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue