diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0f11eccdfef..58b554481a9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -77,6 +77,10 @@ New Features
(missing the term, weight or payload). (Areek Zillur via
Mike McCandless)
+* LUCENE-5404: Add .getCount method to all suggesters (Lookup); persist count
+ metadata on .store(); Dictionary returns InputIterator; Dictionary.getWordIterator
+ renamed to .getEntryIterator. (Areek Zillur)
+
* SOLR-1871: The RangeMapFloatFunction accepts an arbitrary ValueSource
as target and default values. (Chris Harris, shalin)
@@ -386,7 +390,7 @@ New Features
* LUCENE-5323: Add .sizeInBytes method to all suggesters (Lookup).
(Areek Zillur via Mike McCandless)
-
+
* LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure
to never split up blocks of documents indexed with IndexWriter.addDocuments.
(Adrien Grand)
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
index d1ed4e732a6..59194b84fd7 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
@@ -17,19 +17,20 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
-import org.apache.lucene.util.BytesRefIterator;
+
+import org.apache.lucene.search.suggest.InputIterator;
/**
* A simple interface representing a Dictionary. A Dictionary
- * here is just a list of words.
+ * here is a list of entries, where every entry consists of
+ * term, weight and payload.
*
- *
*/
public interface Dictionary {
/**
- * Return all words present in the dictionary
+ * Returns an iterator over all the entries
* @return Iterator
*/
- BytesRefIterator getWordsIterator() throws IOException;
+ InputIterator getEntryIterator() throws IOException;
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
index 826ba28a73c..0150ed18f5b 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
@@ -56,7 +56,7 @@ public class HighFrequencyDictionary implements Dictionary {
}
@Override
- public final BytesRefIterator getWordsIterator() throws IOException {
+ public final InputIterator getEntryIterator() throws IOException {
return new HighFrequencyIterator();
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
index b5d66279d6c..d55e5bbc5d1 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search.spell;
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
@@ -42,12 +43,12 @@ public class LuceneDictionary implements Dictionary {
}
@Override
- public final BytesRefIterator getWordsIterator() throws IOException {
+ public final InputIterator getEntryIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
- return terms.iterator(null);
+ return new InputIterator.InputIteratorWrapper(terms.iterator(null));
} else {
- return BytesRefIterator.EMPTY;
+ return InputIterator.EMPTY;
}
}
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
index 7071ff7cb28..5e77021af79 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
@@ -66,8 +67,8 @@ public class PlainTextDictionary implements Dictionary {
}
@Override
- public BytesRefIterator getWordsIterator() throws IOException {
- return new FileIterator();
+ public InputIterator getEntryIterator() throws IOException {
+ return new InputIterator.InputIteratorWrapper(new FileIterator());
}
final class FileIterator implements BytesRefIterator {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
index 6f5f399ef9a..e61a2879114 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
@@ -512,7 +512,7 @@ public class SpellChecker implements java.io.Closeable {
boolean isEmpty = termsEnums.isEmpty();
try {
- BytesRefIterator iter = dict.getWordsIterator();
+ BytesRefIterator iter = dict.getEntryIterator();
BytesRef currentTerm;
terms: while ((currentTerm = iter.next()) != null) {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
index d948e20edb8..91fdf24ff56 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
@@ -86,7 +86,7 @@ public class DocumentDictionary implements Dictionary {
}
@Override
- public BytesRefIterator getWordsIterator() throws IOException {
+ public InputIterator getEntryIterator() throws IOException {
return new DocumentInputIterator(payloadField!=null);
}
@@ -102,7 +102,6 @@ public class DocumentDictionary implements Dictionary {
private BytesRef currentPayload = null;
private final NumericDocValues weightValues;
-
/**
* Creates an iterator over term, weight and payload fields from the lucene
* index. setting withPayload
to false, implies an iterator
@@ -150,7 +149,7 @@ public class DocumentDictionary implements Dictionary {
currentPayload = tempPayload;
currentWeight = getWeight(doc, currentDocId);
-
+
return tempTerm;
}
return null;
@@ -165,7 +164,7 @@ public class DocumentDictionary implements Dictionary {
public boolean hasPayloads() {
return hasPayloads;
}
-
+
/**
* Returns the value of the weightField
for the current document.
* Retrieves the value for the weightField
if its stored (using doc
)
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
index cbc5763b6bd..d5f720e9ec6 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
@@ -92,7 +92,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary {
}
@Override
- public BytesRefIterator getWordsIterator() throws IOException {
+ public InputIterator getEntryIterator() throws IOException {
return new DocumentValueSourceInputIterator(payloadField!=null);
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
index 5e5968574d0..28921be46ff 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
@@ -106,7 +106,7 @@ public class FileDictionary implements Dictionary {
}
@Override
- public InputIterator getWordsIterator() {
+ public InputIterator getEntryIterator() {
try {
return new FileIterator();
} catch (IOException e) {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
index bda13321012..c98825d4fd6 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
@@ -44,6 +44,9 @@ public interface InputIterator extends BytesRefIterator {
/** Returns true if the iterator has payloads */
public boolean hasPayloads();
+ /** Singleton InputIterator that iterates over 0 BytesRefs. */
+ public static final InputIterator EMPTY = new InputIteratorWrapper(BytesRefIterator.EMPTY);
+
/**
* Wraps a BytesRefIterator as a suggester InputIterator, with all weights
* set to 1
and carries no payload
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
index 3b4e09ce4fa..8662d54aec3 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
@@ -24,8 +24,12 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PriorityQueue;
/**
@@ -33,6 +37,7 @@ import org.apache.lucene.util.PriorityQueue;
* @lucene.experimental
*/
public abstract class Lookup {
+
/**
* Result of a lookup.
*/
@@ -157,21 +162,46 @@ public abstract class Lookup {
* {@link UnsortedInputIterator} in such case.
*/
public void build(Dictionary dict) throws IOException {
- BytesRefIterator it = dict.getWordsIterator();
- InputIterator tfit;
- if (it instanceof InputIterator) {
- tfit = (InputIterator)it;
- } else {
- tfit = new InputIterator.InputIteratorWrapper(it);
- }
- build(tfit);
+ build(dict.getEntryIterator());
}
+ /**
+ * Calls {@link #load(DataInput)} after converting
+ * {@link InputStream} to {@link DataInput}
+ */
+ public boolean load(InputStream input) throws IOException {
+ DataInput dataIn = new InputStreamDataInput(input);
+ try {
+ return load(dataIn);
+ } finally {
+ IOUtils.close(input);
+ }
+ }
+
+ /**
+ * Calls {@link #store(DataOutput)} after converting
+ * {@link OutputStream} to {@link DataOutput}
+ */
+ public boolean store(OutputStream output) throws IOException {
+ DataOutput dataOut = new OutputStreamDataOutput(output);
+ try {
+ return store(dataOut);
+ } finally {
+ IOUtils.close(output);
+ }
+ }
+
+ /**
+ * Get the number of entries the lookup was built with
+ * @return total number of suggester entries
+ */
+ public abstract long getCount();
+
/**
* Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}.
* The implementation might re-sort the data internally.
*/
- public abstract void build(InputIterator tfit) throws IOException;
+ public abstract void build(InputIterator inputIterator) throws IOException;
/**
* Look up a key and return possible completion for this key.
@@ -183,23 +213,22 @@ public abstract class Lookup {
*/
public abstract List lookup(CharSequence key, boolean onlyMorePopular, int num);
-
/**
* Persist the constructed lookup data to a directory. Optional operation.
- * @param output {@link OutputStream} to write the data to.
+ * @param output {@link DataOutput} to write the data to.
* @return true if successful, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
- public abstract boolean store(OutputStream output) throws IOException;
+ public abstract boolean store(DataOutput output) throws IOException;
/**
* Discard current lookup data and load it from a previously saved copy.
* Optional operation.
- * @param input the {@link InputStream} to load the lookup data.
+ * @param input the {@link DataInput} to load the lookup data.
* @return true if completed successfully, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
- public abstract boolean load(InputStream input) throws IOException;
+ public abstract boolean load(DataInput input) throws IOException;
/**
* Get the size of the underlying lookup implementation in memory
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 0cf421278df..164819b7918 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -20,8 +20,6 @@ package org.apache.lucene.search.suggest.analyzing;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
@@ -68,6 +66,8 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
@@ -107,6 +107,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private final File indexPath;
final int minPrefixChars;
private Directory dir;
+ /** Number of entries the lookup was built with */
+ private long count = 0;
/** {@link IndexSearcher} used for lookups. */
protected IndexSearcher searcher;
@@ -154,12 +156,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (DirectoryReader.indexExists(dir)) {
// Already built; open it:
- searcher = new IndexSearcher(DirectoryReader.open(dir));
+ IndexReader reader = DirectoryReader.open(dir);
+ searcher = new IndexSearcher(reader);
// This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress...
payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
weightsDV = MultiDocValues.getNumericValues(searcher.getIndexReader(), "weight");
textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
+ count = reader.numDocs();
assert textDV != null;
}
}
@@ -194,6 +198,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
IndexWriter w2 = null;
AtomicReader r = null;
boolean success = false;
+ count = 0;
try {
Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
@@ -239,7 +244,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} else {
payloadField = null;
}
-
//long t0 = System.nanoTime();
while ((text = iter.next()) != null) {
String textString = text.utf8ToString();
@@ -251,6 +255,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
payloadField.setBytesValue(iter.payload());
}
w.addDocument(doc);
+ count++;
}
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
@@ -612,12 +617,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
@Override
- public boolean store(OutputStream out) {
+ public boolean store(DataOutput in) throws IOException {
return false;
}
@Override
- public boolean load(InputStream out) {
+ public boolean load(DataInput out) throws IOException {
return false;
}
@@ -637,4 +642,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
public long sizeInBytes() {
return RamUsageEstimator.sizeOf(this);
}
+
+ @Override
+ public long getCount() {
+ return count;
+ }
};
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
index 4278440e219..6b2c1f6bbe1 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
@@ -19,8 +19,6 @@ package org.apache.lucene.search.suggest.analyzing;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -38,8 +36,6 @@ import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
@@ -187,6 +183,9 @@ public class AnalyzingSuggester extends Lookup {
/** Whether position holes should appear in the automaton. */
private boolean preservePositionIncrements;
+ /** Number of entries the lookup was built with */
+ private long count = 0;
+
/**
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
@@ -394,6 +393,7 @@ public class AnalyzingSuggester extends Lookup {
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
boolean success = false;
+ count = 0;
byte buffer[] = new byte[8];
try {
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
@@ -458,6 +458,7 @@ public class AnalyzingSuggester extends Lookup {
writer.write(buffer, 0, output.getPosition());
}
+ count++;
}
writer.close();
@@ -571,32 +572,24 @@ public class AnalyzingSuggester extends Lookup {
}
@Override
- public boolean store(OutputStream output) throws IOException {
- DataOutput dataOut = new OutputStreamDataOutput(output);
- try {
- if (fst == null) {
- return false;
- }
-
- fst.save(dataOut);
- dataOut.writeVInt(maxAnalyzedPathsForOneInput);
- dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
- } finally {
- IOUtils.close(output);
+ public boolean store(DataOutput output) throws IOException {
+ output.writeVLong(count);
+ if (fst == null) {
+ return false;
}
+
+ fst.save(output);
+ output.writeVInt(maxAnalyzedPathsForOneInput);
+ output.writeByte((byte) (hasPayloads ? 1 : 0));
return true;
}
@Override
- public boolean load(InputStream input) throws IOException {
- DataInput dataIn = new InputStreamDataInput(input);
- try {
- this.fst = new FST>(dataIn, new PairOutputs(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
- maxAnalyzedPathsForOneInput = dataIn.readVInt();
- hasPayloads = dataIn.readByte() == 1;
- } finally {
- IOUtils.close(input);
- }
+ public boolean load(DataInput input) throws IOException {
+ count = input.readVLong();
+ this.fst = new FST>(input, new PairOutputs(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ maxAnalyzedPathsForOneInput = input.readVInt();
+ hasPayloads = input.readByte() == 1;
return true;
}
@@ -809,6 +802,11 @@ public class AnalyzingSuggester extends Lookup {
throw new RuntimeException(bogus);
}
}
+
+ @Override
+ public long getCount() {
+ return count;
+ }
/** Returns all prefix paths to initialize the search. */
protected List>> getFullPrefixPaths(List>> prefixPaths,
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
index e901ef7f16e..db332474c9a 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
@@ -22,8 +22,6 @@ package org.apache.lucene.search.suggest.analyzing;
// - add pruning of low-freq ngrams?
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
//import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
@@ -62,8 +60,6 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
@@ -153,6 +149,10 @@ public class FreeTextSuggester extends Lookup {
private final int grams;
private final byte separator;
+
+ /** Number of entries the lookup was built with */
+ private long count = 0;
+
/** The default character used to join multiple tokens
* into a single ngram token. The input tokens produced
* by the analyzer must not contain this character. */
@@ -320,6 +320,7 @@ public class FreeTextSuggester extends Lookup {
IndexReader reader = null;
boolean success = false;
+ count = 0;
try {
while (true) {
BytesRef surfaceForm = iterator.next();
@@ -328,6 +329,7 @@ public class FreeTextSuggester extends Lookup {
}
field.setStringValue(surfaceForm.utf8ToString());
writer.addDocument(doc);
+ count++;
}
reader = DirectoryReader.open(writer, false);
@@ -397,31 +399,31 @@ public class FreeTextSuggester extends Lookup {
}
@Override
- public boolean store(OutputStream output) throws IOException {
- DataOutput out = new OutputStreamDataOutput(output);
- CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
- out.writeByte(separator);
- out.writeVInt(grams);
- out.writeVLong(totTokens);
- fst.save(out);
+ public boolean store(DataOutput output) throws IOException {
+ CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT);
+ output.writeVLong(count);
+ output.writeByte(separator);
+ output.writeVInt(grams);
+ output.writeVLong(totTokens);
+ fst.save(output);
return true;
}
@Override
- public boolean load(InputStream input) throws IOException {
- DataInput in = new InputStreamDataInput(input);
- CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
- byte separatorOrig = in.readByte();
+ public boolean load(DataInput input) throws IOException {
+ CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
+ count = input.readVLong();
+ byte separatorOrig = input.readByte();
if (separatorOrig != separator) {
throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
}
- int gramsOrig = in.readVInt();
+ int gramsOrig = input.readVInt();
if (gramsOrig != grams) {
throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
}
- totTokens = in.readVLong();
+ totTokens = input.readVLong();
- fst = new FST(in, PositiveIntOutputs.getSingleton());
+ fst = new FST(input, PositiveIntOutputs.getSingleton());
return true;
}
@@ -436,6 +438,11 @@ public class FreeTextSuggester extends Lookup {
}
}
+ @Override
+ public long getCount() {
+ return count;
+ }
+
private int countGrams(BytesRef token) {
int count = 1;
for(int i=0;i= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
}
output.reset(buffer);
- output.writeInt(encodeWeight(tfit.weight()));
+ output.writeInt(encodeWeight(iterator.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
@@ -207,6 +213,7 @@ public class FSTCompletionLookup extends Lookup {
builder.add(tmp2, bucket);
line++;
+ count++;
}
// The two FSTCompletions share the same automaton.
@@ -264,28 +271,21 @@ public class FSTCompletionLookup extends Lookup {
@Override
- public synchronized boolean store(OutputStream output) throws IOException {
-
- try {
- if (this.normalCompletion == null || normalCompletion.getFST() == null)
- return false;
- normalCompletion.getFST().save(new OutputStreamDataOutput(output));
- } finally {
- IOUtils.close(output);
- }
+ public synchronized boolean store(DataOutput output) throws IOException {
+ output.writeVLong(count);
+ if (this.normalCompletion == null || normalCompletion.getFST() == null)
+ return false;
+ normalCompletion.getFST().save(output);
return true;
}
@Override
- public synchronized boolean load(InputStream input) throws IOException {
- try {
- this.higherWeightsCompletion = new FSTCompletion(new FST