LUCENE-5404: Add .getCount method to all suggesters (Lookup), persist count metadata on .store(), Dictionary returns InputIterator, Dictionary.getWordIterator renamed to .getEntryIterator

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565810 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Areek Zillur 2014-02-07 20:58:19 +00:00
parent f9ea3f8101
commit b5926da2d6
28 changed files with 269 additions and 206 deletions

View File

@ -77,6 +77,10 @@ New Features
(missing the term, weight or payload). (Areek Zillur via (missing the term, weight or payload). (Areek Zillur via
Mike McCandless) Mike McCandless)
* LUCENE-5404: Add .getCount method to all suggesters (Lookup); persist count
metadata on .store(); Dictionary returns InputIterator; Dictionary.getWordIterator
renamed to .getEntryIterator. (Areek Zillur)
* SOLR-1871: The RangeMapFloatFunction accepts an arbitrary ValueSource * SOLR-1871: The RangeMapFloatFunction accepts an arbitrary ValueSource
as target and default values. (Chris Harris, shalin) as target and default values. (Chris Harris, shalin)
@ -386,7 +390,7 @@ New Features
* LUCENE-5323: Add .sizeInBytes method to all suggesters (Lookup). * LUCENE-5323: Add .sizeInBytes method to all suggesters (Lookup).
(Areek Zillur via Mike McCandless) (Areek Zillur via Mike McCandless)
* LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure * LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure
to never split up blocks of documents indexed with IndexWriter.addDocuments. to never split up blocks of documents indexed with IndexWriter.addDocuments.
(Adrien Grand) (Adrien Grand)

View File

@ -17,19 +17,20 @@ package org.apache.lucene.search.spell;
*/ */
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.search.suggest.InputIterator;
/** /**
* A simple interface representing a Dictionary. A Dictionary * A simple interface representing a Dictionary. A Dictionary
* here is just a list of words. * here is a list of entries, where every entry consists of
* term, weight and payload.
* *
*
*/ */
public interface Dictionary { public interface Dictionary {
/** /**
* Return all words present in the dictionary * Returns an iterator over all the entries
* @return Iterator * @return Iterator
*/ */
BytesRefIterator getWordsIterator() throws IOException; InputIterator getEntryIterator() throws IOException;
} }

View File

@ -56,7 +56,7 @@ public class HighFrequencyDictionary implements Dictionary {
} }
@Override @Override
public final BytesRefIterator getWordsIterator() throws IOException { public final InputIterator getEntryIterator() throws IOException {
return new HighFrequencyIterator(); return new HighFrequencyIterator();
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.spell;
*/ */
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
@ -42,12 +43,12 @@ public class LuceneDictionary implements Dictionary {
} }
@Override @Override
public final BytesRefIterator getWordsIterator() throws IOException { public final InputIterator getEntryIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field); final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) { if (terms != null) {
return terms.iterator(null); return new InputIterator.InputIteratorWrapper(terms.iterator(null));
} else { } else {
return BytesRefIterator.EMPTY; return InputIterator.EMPTY;
} }
} }
} }

View File

@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -66,8 +67,8 @@ public class PlainTextDictionary implements Dictionary {
} }
@Override @Override
public BytesRefIterator getWordsIterator() throws IOException { public InputIterator getEntryIterator() throws IOException {
return new FileIterator(); return new InputIterator.InputIteratorWrapper(new FileIterator());
} }
final class FileIterator implements BytesRefIterator { final class FileIterator implements BytesRefIterator {

View File

@ -512,7 +512,7 @@ public class SpellChecker implements java.io.Closeable {
boolean isEmpty = termsEnums.isEmpty(); boolean isEmpty = termsEnums.isEmpty();
try { try {
BytesRefIterator iter = dict.getWordsIterator(); BytesRefIterator iter = dict.getEntryIterator();
BytesRef currentTerm; BytesRef currentTerm;
terms: while ((currentTerm = iter.next()) != null) { terms: while ((currentTerm = iter.next()) != null) {

View File

@ -86,7 +86,7 @@ public class DocumentDictionary implements Dictionary {
} }
@Override @Override
public BytesRefIterator getWordsIterator() throws IOException { public InputIterator getEntryIterator() throws IOException {
return new DocumentInputIterator(payloadField!=null); return new DocumentInputIterator(payloadField!=null);
} }
@ -102,7 +102,6 @@ public class DocumentDictionary implements Dictionary {
private BytesRef currentPayload = null; private BytesRef currentPayload = null;
private final NumericDocValues weightValues; private final NumericDocValues weightValues;
/** /**
* Creates an iterator over term, weight and payload fields from the lucene * Creates an iterator over term, weight and payload fields from the lucene
* index. setting <code>withPayload</code> to false, implies an iterator * index. setting <code>withPayload</code> to false, implies an iterator
@ -150,7 +149,7 @@ public class DocumentDictionary implements Dictionary {
currentPayload = tempPayload; currentPayload = tempPayload;
currentWeight = getWeight(doc, currentDocId); currentWeight = getWeight(doc, currentDocId);
return tempTerm; return tempTerm;
} }
return null; return null;
@ -165,7 +164,7 @@ public class DocumentDictionary implements Dictionary {
public boolean hasPayloads() { public boolean hasPayloads() {
return hasPayloads; return hasPayloads;
} }
/** /**
* Returns the value of the <code>weightField</code> for the current document. * Returns the value of the <code>weightField</code> for the current document.
* Retrieves the value for the <code>weightField</code> if its stored (using <code>doc</code>) * Retrieves the value for the <code>weightField</code> if its stored (using <code>doc</code>)

View File

@ -92,7 +92,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary {
} }
@Override @Override
public BytesRefIterator getWordsIterator() throws IOException { public InputIterator getEntryIterator() throws IOException {
return new DocumentValueSourceInputIterator(payloadField!=null); return new DocumentValueSourceInputIterator(payloadField!=null);
} }

View File

@ -106,7 +106,7 @@ public class FileDictionary implements Dictionary {
} }
@Override @Override
public InputIterator getWordsIterator() { public InputIterator getEntryIterator() {
try { try {
return new FileIterator(); return new FileIterator();
} catch (IOException e) { } catch (IOException e) {

View File

@ -44,6 +44,9 @@ public interface InputIterator extends BytesRefIterator {
/** Returns true if the iterator has payloads */ /** Returns true if the iterator has payloads */
public boolean hasPayloads(); public boolean hasPayloads();
/** Singleton InputIterator that iterates over 0 BytesRefs. */
public static final InputIterator EMPTY = new InputIteratorWrapper(BytesRefIterator.EMPTY);
/** /**
* Wraps a BytesRefIterator as a suggester InputIterator, with all weights * Wraps a BytesRefIterator as a suggester InputIterator, with all weights
* set to <code>1</code> and carries no payload * set to <code>1</code> and carries no payload

View File

@ -24,8 +24,12 @@ import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
/** /**
@ -33,6 +37,7 @@ import org.apache.lucene.util.PriorityQueue;
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class Lookup { public abstract class Lookup {
/** /**
* Result of a lookup. * Result of a lookup.
*/ */
@ -157,21 +162,46 @@ public abstract class Lookup {
* {@link UnsortedInputIterator} in such case. * {@link UnsortedInputIterator} in such case.
*/ */
public void build(Dictionary dict) throws IOException { public void build(Dictionary dict) throws IOException {
BytesRefIterator it = dict.getWordsIterator(); build(dict.getEntryIterator());
InputIterator tfit;
if (it instanceof InputIterator) {
tfit = (InputIterator)it;
} else {
tfit = new InputIterator.InputIteratorWrapper(it);
}
build(tfit);
} }
/**
* Calls {@link #load(DataInput)} after converting
* {@link InputStream} to {@link DataInput}
*/
public boolean load(InputStream input) throws IOException {
DataInput dataIn = new InputStreamDataInput(input);
try {
return load(dataIn);
} finally {
IOUtils.close(input);
}
}
/**
* Calls {@link #store(DataOutput)} after converting
* {@link OutputStream} to {@link DataOutput}
*/
public boolean store(OutputStream output) throws IOException {
DataOutput dataOut = new OutputStreamDataOutput(output);
try {
return store(dataOut);
} finally {
IOUtils.close(output);
}
}
/**
* Get the number of entries the lookup was built with
* @return total number of suggester entries
*/
public abstract long getCount();
/** /**
* Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}. * Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}.
* The implementation might re-sort the data internally. * The implementation might re-sort the data internally.
*/ */
public abstract void build(InputIterator tfit) throws IOException; public abstract void build(InputIterator inputIterator) throws IOException;
/** /**
* Look up a key and return possible completion for this key. * Look up a key and return possible completion for this key.
@ -183,23 +213,22 @@ public abstract class Lookup {
*/ */
public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num); public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
/** /**
* Persist the constructed lookup data to a directory. Optional operation. * Persist the constructed lookup data to a directory. Optional operation.
* @param output {@link OutputStream} to write the data to. * @param output {@link DataOutput} to write the data to.
* @return true if successful, false if unsuccessful or not supported. * @return true if successful, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs. * @throws IOException when fatal IO error occurs.
*/ */
public abstract boolean store(OutputStream output) throws IOException; public abstract boolean store(DataOutput output) throws IOException;
/** /**
* Discard current lookup data and load it from a previously saved copy. * Discard current lookup data and load it from a previously saved copy.
* Optional operation. * Optional operation.
* @param input the {@link InputStream} to load the lookup data. * @param input the {@link DataInput} to load the lookup data.
* @return true if completed successfully, false if unsuccessful or not supported. * @return true if completed successfully, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs. * @throws IOException when fatal IO error occurs.
*/ */
public abstract boolean load(InputStream input) throws IOException; public abstract boolean load(DataInput input) throws IOException;
/** /**
* Get the size of the underlying lookup implementation in memory * Get the size of the underlying lookup implementation in memory

View File

@ -20,8 +20,6 @@ package org.apache.lucene.search.suggest.analyzing;
import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
@ -68,6 +66,8 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -107,6 +107,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private final File indexPath; private final File indexPath;
final int minPrefixChars; final int minPrefixChars;
private Directory dir; private Directory dir;
/** Number of entries the lookup was built with */
private long count = 0;
/** {@link IndexSearcher} used for lookups. */ /** {@link IndexSearcher} used for lookups. */
protected IndexSearcher searcher; protected IndexSearcher searcher;
@ -154,12 +156,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (DirectoryReader.indexExists(dir)) { if (DirectoryReader.indexExists(dir)) {
// Already built; open it: // Already built; open it:
searcher = new IndexSearcher(DirectoryReader.open(dir)); IndexReader reader = DirectoryReader.open(dir);
searcher = new IndexSearcher(reader);
// This will just be null if app didn't pass payloads to build(): // This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress... // TODO: maybe just stored fields? they compress...
payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads"); payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
weightsDV = MultiDocValues.getNumericValues(searcher.getIndexReader(), "weight"); weightsDV = MultiDocValues.getNumericValues(searcher.getIndexReader(), "weight");
textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME); textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
count = reader.numDocs();
assert textDV != null; assert textDV != null;
} }
} }
@ -194,6 +198,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
IndexWriter w2 = null; IndexWriter w2 = null;
AtomicReader r = null; AtomicReader r = null;
boolean success = false; boolean success = false;
count = 0;
try { try {
Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override @Override
@ -239,7 +244,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} else { } else {
payloadField = null; payloadField = null;
} }
//long t0 = System.nanoTime(); //long t0 = System.nanoTime();
while ((text = iter.next()) != null) { while ((text = iter.next()) != null) {
String textString = text.utf8ToString(); String textString = text.utf8ToString();
@ -251,6 +255,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
payloadField.setBytesValue(iter.payload()); payloadField.setBytesValue(iter.payload());
} }
w.addDocument(doc); w.addDocument(doc);
count++;
} }
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
@ -612,12 +617,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
@Override @Override
public boolean store(OutputStream out) { public boolean store(DataOutput in) throws IOException {
return false; return false;
} }
@Override @Override
public boolean load(InputStream out) { public boolean load(DataInput out) throws IOException {
return false; return false;
} }
@ -637,4 +642,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
public long sizeInBytes() { public long sizeInBytes() {
return RamUsageEstimator.sizeOf(this); return RamUsageEstimator.sizeOf(this);
} }
@Override
public long getCount() {
return count;
}
}; };

View File

@ -19,8 +19,6 @@ package org.apache.lucene.search.suggest.analyzing;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -38,8 +36,6 @@ import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -187,6 +183,9 @@ public class AnalyzingSuggester extends Lookup {
/** Whether position holes should appear in the automaton. */ /** Whether position holes should appear in the automaton. */
private boolean preservePositionIncrements; private boolean preservePositionIncrements;
/** Number of entries the lookup was built with */
private long count = 0;
/** /**
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean) * Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
@ -394,6 +393,7 @@ public class AnalyzingSuggester extends Lookup {
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
boolean success = false; boolean success = false;
count = 0;
byte buffer[] = new byte[8]; byte buffer[] = new byte[8];
try { try {
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
@ -458,6 +458,7 @@ public class AnalyzingSuggester extends Lookup {
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
} }
count++;
} }
writer.close(); writer.close();
@ -571,32 +572,24 @@ public class AnalyzingSuggester extends Lookup {
} }
@Override @Override
public boolean store(OutputStream output) throws IOException { public boolean store(DataOutput output) throws IOException {
DataOutput dataOut = new OutputStreamDataOutput(output); output.writeVLong(count);
try { if (fst == null) {
if (fst == null) { return false;
return false;
}
fst.save(dataOut);
dataOut.writeVInt(maxAnalyzedPathsForOneInput);
dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
} finally {
IOUtils.close(output);
} }
fst.save(output);
output.writeVInt(maxAnalyzedPathsForOneInput);
output.writeByte((byte) (hasPayloads ? 1 : 0));
return true; return true;
} }
@Override @Override
public boolean load(InputStream input) throws IOException { public boolean load(DataInput input) throws IOException {
DataInput dataIn = new InputStreamDataInput(input); count = input.readVLong();
try { this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt();
maxAnalyzedPathsForOneInput = dataIn.readVInt(); hasPayloads = input.readByte() == 1;
hasPayloads = dataIn.readByte() == 1;
} finally {
IOUtils.close(input);
}
return true; return true;
} }
@ -809,6 +802,11 @@ public class AnalyzingSuggester extends Lookup {
throw new RuntimeException(bogus); throw new RuntimeException(bogus);
} }
} }
@Override
public long getCount() {
return count;
}
/** Returns all prefix paths to initialize the search. */ /** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,

View File

@ -22,8 +22,6 @@ package org.apache.lucene.search.suggest.analyzing;
// - add pruning of low-freq ngrams? // - add pruning of low-freq ngrams?
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
//import java.io.PrintWriter; //import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
@ -62,8 +60,6 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -153,6 +149,10 @@ public class FreeTextSuggester extends Lookup {
private final int grams; private final int grams;
private final byte separator; private final byte separator;
/** Number of entries the lookup was built with */
private long count = 0;
/** The default character used to join multiple tokens /** The default character used to join multiple tokens
* into a single ngram token. The input tokens produced * into a single ngram token. The input tokens produced
* by the analyzer must not contain this character. */ * by the analyzer must not contain this character. */
@ -320,6 +320,7 @@ public class FreeTextSuggester extends Lookup {
IndexReader reader = null; IndexReader reader = null;
boolean success = false; boolean success = false;
count = 0;
try { try {
while (true) { while (true) {
BytesRef surfaceForm = iterator.next(); BytesRef surfaceForm = iterator.next();
@ -328,6 +329,7 @@ public class FreeTextSuggester extends Lookup {
} }
field.setStringValue(surfaceForm.utf8ToString()); field.setStringValue(surfaceForm.utf8ToString());
writer.addDocument(doc); writer.addDocument(doc);
count++;
} }
reader = DirectoryReader.open(writer, false); reader = DirectoryReader.open(writer, false);
@ -397,31 +399,31 @@ public class FreeTextSuggester extends Lookup {
} }
@Override @Override
public boolean store(OutputStream output) throws IOException { public boolean store(DataOutput output) throws IOException {
DataOutput out = new OutputStreamDataOutput(output); CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT);
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); output.writeVLong(count);
out.writeByte(separator); output.writeByte(separator);
out.writeVInt(grams); output.writeVInt(grams);
out.writeVLong(totTokens); output.writeVLong(totTokens);
fst.save(out); fst.save(output);
return true; return true;
} }
@Override @Override
public boolean load(InputStream input) throws IOException { public boolean load(DataInput input) throws IOException {
DataInput in = new InputStreamDataInput(input); CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); count = input.readVLong();
byte separatorOrig = in.readByte(); byte separatorOrig = input.readByte();
if (separatorOrig != separator) { if (separatorOrig != separator) {
throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
} }
int gramsOrig = in.readVInt(); int gramsOrig = input.readVInt();
if (gramsOrig != grams) { if (gramsOrig != grams) {
throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig); throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
} }
totTokens = in.readVLong(); totTokens = input.readVLong();
fst = new FST<Long>(in, PositiveIntOutputs.getSingleton()); fst = new FST<Long>(input, PositiveIntOutputs.getSingleton());
return true; return true;
} }
@ -436,6 +438,11 @@ public class FreeTextSuggester extends Lookup {
} }
} }
@Override
public long getCount() {
return count;
}
private int countGrams(BytesRef token) { private int countGrams(BytesRef token) {
int count = 1; int count = 1;
for(int i=0;i<token.length;i++) { for(int i=0;i<token.length;i++) {

View File

@ -32,6 +32,8 @@ import org.apache.lucene.search.suggest.fst.FSTCompletion.Completion;
import org.apache.lucene.search.suggest.tst.TSTLookup; import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.*; import org.apache.lucene.util.*;
@ -93,6 +95,9 @@ public class FSTCompletionLookup extends Lookup {
*/ */
private FSTCompletion normalCompletion; private FSTCompletion normalCompletion;
/** Number of entries the lookup was built with */
private long count = 0;
/** /**
* This constructor prepares for creating a suggested FST using the * This constructor prepares for creating a suggested FST using the
* {@link #build(InputIterator)} method. The number of weight * {@link #build(InputIterator)} method. The number of weight
@ -140,8 +145,8 @@ public class FSTCompletionLookup extends Lookup {
} }
@Override @Override
public void build(InputIterator tfit) throws IOException { public void build(InputIterator iterator) throws IOException {
if (tfit.hasPayloads()) { if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads"); throw new IllegalArgumentException("this suggester doesn't support payloads");
} }
File tempInput = File.createTempFile( File tempInput = File.createTempFile(
@ -156,17 +161,18 @@ public class FSTCompletionLookup extends Lookup {
// Push floats up front before sequences to sort them. For now, assume they are non-negative. // Push floats up front before sequences to sort them. For now, assume they are non-negative.
// If negative floats are allowed some trickery needs to be done to find their byte order. // If negative floats are allowed some trickery needs to be done to find their byte order.
boolean success = false; boolean success = false;
count = 0;
try { try {
byte [] buffer = new byte [0]; byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
BytesRef spare; BytesRef spare;
while ((spare = tfit.next()) != null) { while ((spare = iterator.next()) != null) {
if (spare.length + 4 >= buffer.length) { if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4); buffer = ArrayUtil.grow(buffer, spare.length + 4);
} }
output.reset(buffer); output.reset(buffer);
output.writeInt(encodeWeight(tfit.weight())); output.writeInt(encodeWeight(iterator.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
} }
@ -207,6 +213,7 @@ public class FSTCompletionLookup extends Lookup {
builder.add(tmp2, bucket); builder.add(tmp2, bucket);
line++; line++;
count++;
} }
// The two FSTCompletions share the same automaton. // The two FSTCompletions share the same automaton.
@ -264,28 +271,21 @@ public class FSTCompletionLookup extends Lookup {
@Override @Override
public synchronized boolean store(OutputStream output) throws IOException { public synchronized boolean store(DataOutput output) throws IOException {
output.writeVLong(count);
try { if (this.normalCompletion == null || normalCompletion.getFST() == null)
if (this.normalCompletion == null || normalCompletion.getFST() == null) return false;
return false; normalCompletion.getFST().save(output);
normalCompletion.getFST().save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
}
return true; return true;
} }
@Override @Override
public synchronized boolean load(InputStream input) throws IOException { public synchronized boolean load(DataInput input) throws IOException {
try { count = input.readVLong();
this.higherWeightsCompletion = new FSTCompletion(new FST<Object>( this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
new InputStreamDataInput(input), NoOutputs.getSingleton())); input, NoOutputs.getSingleton()));
this.normalCompletion = new FSTCompletion( this.normalCompletion = new FSTCompletion(
higherWeightsCompletion.getFST(), false, exactMatchFirst); higherWeightsCompletion.getFST(), false, exactMatchFirst);
} finally {
IOUtils.close(input);
}
return true; return true;
} }
@ -293,4 +293,9 @@ public class FSTCompletionLookup extends Lookup {
public long sizeInBytes() { public long sizeInBytes() {
return RamUsageEstimator.sizeOf(this); return RamUsageEstimator.sizeOf(this);
} }
@Override
public long getCount() {
return count;
}
} }

View File

@ -18,8 +18,6 @@ package org.apache.lucene.search.suggest.fst;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -31,12 +29,11 @@ import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.search.suggest.SortedInputIterator;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
@ -72,6 +69,9 @@ public class WFSTCompletionLookup extends Lookup {
*/ */
private final boolean exactFirst; private final boolean exactFirst;
/** Number of entries the lookup was built with */
private long count = 0;
/** /**
* Calls {@link #WFSTCompletionLookup(boolean) WFSTCompletionLookup(true)} * Calls {@link #WFSTCompletionLookup(boolean) WFSTCompletionLookup(true)}
*/ */
@ -96,6 +96,7 @@ public class WFSTCompletionLookup extends Lookup {
if (iterator.hasPayloads()) { if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads"); throw new IllegalArgumentException("this suggester doesn't support payloads");
} }
count = 0;
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
InputIterator iter = new WFSTInputIterator(iterator); InputIterator iter = new WFSTInputIterator(iterator);
IntsRef scratchInts = new IntsRef(); IntsRef scratchInts = new IntsRef();
@ -114,31 +115,26 @@ public class WFSTCompletionLookup extends Lookup {
Util.toIntsRef(scratch, scratchInts); Util.toIntsRef(scratch, scratchInts);
builder.add(scratchInts, cost); builder.add(scratchInts, cost);
previous.copyBytes(scratch); previous.copyBytes(scratch);
count++;
} }
fst = builder.finish(); fst = builder.finish();
} }
@Override @Override
public boolean store(OutputStream output) throws IOException { public boolean store(DataOutput output) throws IOException {
try { output.writeVLong(count);
if (fst == null) { if (fst == null) {
return false; return false;
}
fst.save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
} }
fst.save(output);
return true; return true;
} }
@Override @Override
public boolean load(InputStream input) throws IOException { public boolean load(DataInput input) throws IOException {
try { count = input.readVLong();
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton()); this.fst = new FST<Long>(input, PositiveIntOutputs.getSingleton());
} finally {
IOUtils.close(input);
}
return true; return true;
} }
@ -293,4 +289,9 @@ public class WFSTCompletionLookup extends Lookup {
public long sizeInBytes() { public long sizeInBytes() {
return (fst == null) ? 0 : fst.sizeInBytes(); return (fst == null) ? 0 : fst.sizeInBytes();
} }
@Override
public long getCount() {
return count;
}
} }

View File

@ -17,20 +17,17 @@ package org.apache.lucene.search.suggest.jaspell;
* limitations under the License. * limitations under the License.
*/ */
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
@ -44,6 +41,9 @@ public class JaspellLookup extends Lookup {
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
private boolean usePrefix = true; private boolean usePrefix = true;
private int editDistance = 2; private int editDistance = 2;
/** Number of entries the lookup was built with */
private long count = 0;
/** /**
* Creates a new empty trie * Creates a new empty trie
@ -52,23 +52,25 @@ public class JaspellLookup extends Lookup {
public JaspellLookup() {} public JaspellLookup() {}
@Override @Override
public void build(InputIterator tfit) throws IOException { public void build(InputIterator iterator) throws IOException {
if (tfit.hasPayloads()) { if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads"); throw new IllegalArgumentException("this suggester doesn't support payloads");
} }
count = 0;
trie = new JaspellTernarySearchTrie(); trie = new JaspellTernarySearchTrie();
trie.setMatchAlmostDiff(editDistance); trie.setMatchAlmostDiff(editDistance);
BytesRef spare; BytesRef spare;
final CharsRef charsSpare = new CharsRef(); final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) { while ((spare = iterator.next()) != null) {
final long weight = tfit.weight(); final long weight = iterator.weight();
if (spare.length == 0) { if (spare.length == 0) {
continue; continue;
} }
charsSpare.grow(spare.length); charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
trie.put(charsSpare.toString(), Long.valueOf(weight)); trie.put(charsSpare.toString(), Long.valueOf(weight));
count++;
} }
} }
@ -131,8 +133,8 @@ public class JaspellLookup extends Lookup {
private static final byte HI_KID = 0x04; private static final byte HI_KID = 0x04;
private static final byte HAS_VALUE = 0x08; private static final byte HAS_VALUE = 0x08;
private void readRecursively(DataInputStream in, TSTNode node) throws IOException { private void readRecursively(DataInput in, TSTNode node) throws IOException {
node.splitchar = in.readChar(); node.splitchar = in.readString().charAt(0);
byte mask = in.readByte(); byte mask = in.readByte();
if ((mask & HAS_VALUE) != 0) { if ((mask & HAS_VALUE) != 0) {
node.data = Long.valueOf(in.readLong()); node.data = Long.valueOf(in.readLong());
@ -154,11 +156,11 @@ public class JaspellLookup extends Lookup {
} }
} }
private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException { private void writeRecursively(DataOutput out, TSTNode node) throws IOException {
if (node == null) { if (node == null) {
return; return;
} }
out.writeChar(node.splitchar); out.writeString(new String(new char[] {node.splitchar}, 0, 1));
byte mask = 0; byte mask = 0;
if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID; if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID; if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
@ -174,31 +176,22 @@ public class JaspellLookup extends Lookup {
} }
@Override @Override
public boolean store(OutputStream output) throws IOException { public boolean store(DataOutput output) throws IOException {
output.writeVLong(count);
TSTNode root = trie.getRoot(); TSTNode root = trie.getRoot();
if (root == null) { // empty tree if (root == null) { // empty tree
return false; return false;
} }
DataOutputStream out = new DataOutputStream(output); writeRecursively(output, root);
try {
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(out);
}
return true; return true;
} }
@Override @Override
public boolean load(InputStream input) throws IOException { public boolean load(DataInput input) throws IOException {
DataInputStream in = new DataInputStream(input); count = input.readVLong();
TSTNode root = trie.new TSTNode('\0', null); TSTNode root = trie.new TSTNode('\0', null);
try { readRecursively(input, root);
readRecursively(in, root); trie.setRoot(root);
trie.setRoot(root);
} finally {
IOUtils.close(in);
}
return true; return true;
} }
@ -207,4 +200,9 @@ public class JaspellLookup extends Lookup {
public long sizeInBytes() { public long sizeInBytes() {
return RamUsageEstimator.sizeOf(trie); return RamUsageEstimator.sizeOf(trie);
} }
@Override
public long getCount() {
return count;
}
} }

View File

@ -17,20 +17,17 @@ package org.apache.lucene.search.suggest.tst;
* limitations under the License. * limitations under the License.
*/ */
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.search.suggest.SortedInputIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
@ -43,6 +40,9 @@ import org.apache.lucene.util.UnicodeUtil;
public class TSTLookup extends Lookup { public class TSTLookup extends Lookup {
TernaryTreeNode root = new TernaryTreeNode(); TernaryTreeNode root = new TernaryTreeNode();
TSTAutocomplete autocomplete = new TSTAutocomplete(); TSTAutocomplete autocomplete = new TSTAutocomplete();
/** Number of entries the lookup was built with */
private long count = 0;
/** /**
* Creates a new TSTLookup with an empty Ternary Search Tree. * Creates a new TSTLookup with an empty Ternary Search Tree.
@ -51,24 +51,25 @@ public class TSTLookup extends Lookup {
public TSTLookup() {} public TSTLookup() {}
@Override @Override
public void build(InputIterator tfit) throws IOException { public void build(InputIterator iterator) throws IOException {
if (tfit.hasPayloads()) { if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads"); throw new IllegalArgumentException("this suggester doesn't support payloads");
} }
root = new TernaryTreeNode(); root = new TernaryTreeNode();
// make sure it's sorted and the comparator uses UTF16 sort order // make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); iterator = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUTF16Comparator());
count = 0;
ArrayList<String> tokens = new ArrayList<String>(); ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Number> vals = new ArrayList<Number>(); ArrayList<Number> vals = new ArrayList<Number>();
BytesRef spare; BytesRef spare;
CharsRef charsSpare = new CharsRef(); CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) { while ((spare = iterator.next()) != null) {
charsSpare.grow(spare.length); charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString()); tokens.add(charsSpare.toString());
vals.add(Long.valueOf(tfit.weight())); vals.add(Long.valueOf(iterator.weight()));
count++;
} }
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
} }
@ -148,11 +149,11 @@ public class TSTLookup extends Lookup {
private static final byte HAS_VALUE = 0x10; private static final byte HAS_VALUE = 0x10;
// pre-order traversal // pre-order traversal
private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException { private void readRecursively(DataInput in, TernaryTreeNode node) throws IOException {
node.splitchar = in.readChar(); node.splitchar = in.readString().charAt(0);
byte mask = in.readByte(); byte mask = in.readByte();
if ((mask & HAS_TOKEN) != 0) { if ((mask & HAS_TOKEN) != 0) {
node.token = in.readUTF(); node.token = in.readString();
} }
if ((mask & HAS_VALUE) != 0) { if ((mask & HAS_VALUE) != 0) {
node.val = Long.valueOf(in.readLong()); node.val = Long.valueOf(in.readLong());
@ -172,9 +173,9 @@ public class TSTLookup extends Lookup {
} }
// pre-order traversal // pre-order traversal
private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException { private void writeRecursively(DataOutput out, TernaryTreeNode node) throws IOException {
// write out the current node // write out the current node
out.writeChar(node.splitchar); out.writeString(new String(new char[] {node.splitchar}, 0, 1));
// prepare a mask of kids // prepare a mask of kids
byte mask = 0; byte mask = 0;
if (node.eqKid != null) mask |= EQ_KID; if (node.eqKid != null) mask |= EQ_KID;
@ -183,7 +184,7 @@ public class TSTLookup extends Lookup {
if (node.token != null) mask |= HAS_TOKEN; if (node.token != null) mask |= HAS_TOKEN;
if (node.val != null) mask |= HAS_VALUE; if (node.val != null) mask |= HAS_VALUE;
out.writeByte(mask); out.writeByte(mask);
if (node.token != null) out.writeUTF(node.token); if (node.token != null) out.writeString(node.token);
if (node.val != null) out.writeLong(((Number)node.val).longValue()); if (node.val != null) out.writeLong(((Number)node.val).longValue());
// recurse and write kids // recurse and write kids
if (node.loKid != null) { if (node.loKid != null) {
@ -198,26 +199,17 @@ public class TSTLookup extends Lookup {
} }
@Override @Override
public synchronized boolean store(OutputStream output) throws IOException { public synchronized boolean store(DataOutput output) throws IOException {
DataOutputStream out = new DataOutputStream(output); output.writeVLong(count);
try { writeRecursively(output, root);
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(output);
}
return true; return true;
} }
@Override @Override
public synchronized boolean load(InputStream input) throws IOException { public synchronized boolean load(DataInput input) throws IOException {
DataInputStream in = new DataInputStream(input); count = input.readVLong();
root = new TernaryTreeNode(); root = new TernaryTreeNode();
try { readRecursively(input, root);
readRecursively(in, root);
} finally {
IOUtils.close(in);
}
return true; return true;
} }
@ -227,4 +219,8 @@ public class TSTLookup extends Lookup {
return RamUsageEstimator.sizeOf(autocomplete); return RamUsageEstimator.sizeOf(autocomplete);
} }
@Override
public long getCount() {
return count;
}
} }

View File

@ -90,7 +90,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
indexReader = DirectoryReader.open(store); indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "nonexistent_field"); ld = new LuceneDictionary(indexReader, "nonexistent_field");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
assertNull("More elements than expected", spare = it.next()); assertNull("More elements than expected", spare = it.next());
} finally { } finally {
@ -103,7 +103,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
indexReader = DirectoryReader.open(store); indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "aaa"); ld = new LuceneDictionary(indexReader, "aaa");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
assertNotNull("First element doesn't exist.", spare = it.next()); assertNotNull("First element doesn't exist.", spare = it.next());
assertTrue("First element isn't correct", spare.utf8ToString().equals("foo")); assertTrue("First element isn't correct", spare.utf8ToString().equals("foo"));
assertNull("More elements than expected", it.next()); assertNull("More elements than expected", it.next());
@ -117,7 +117,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
indexReader = DirectoryReader.open(store); indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "contents"); ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
assertNotNull("First element doesn't exist.", spare = it.next()); assertNotNull("First element doesn't exist.", spare = it.next());
assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry")); assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry"));
@ -126,7 +126,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
assertNull("More elements than expected", it.next()); assertNull("More elements than expected", it.next());
ld = new LuceneDictionary(indexReader, "contents"); ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
int counter = 2; int counter = 2;
while (it.next() != null) { while (it.next() != null) {
@ -145,7 +145,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
indexReader = DirectoryReader.open(store); indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "contents"); ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
// just iterate through words // just iterate through words
assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString()); assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString());
@ -162,7 +162,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
indexReader = DirectoryReader.open(store); indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "zzz"); ld = new LuceneDictionary(indexReader, "zzz");
it = ld.getWordsIterator(); it = ld.getEntryIterator();
assertNotNull("First element doesn't exist.", spare = it.next()); assertNotNull("First element doesn't exist.", spare = it.next());
assertEquals("First element isn't correct", "bar", spare.utf8ToString()); assertEquals("First element isn't correct", "bar", spare.utf8ToString());

View File

@ -109,7 +109,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
writer.close(); writer.close();
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = dictionary.getEntryIterator();
assertNull(inputIterator.next()); assertNull(inputIterator.next());
assertEquals(inputIterator.weight(), 0); assertEquals(inputIterator.weight(), 0);
@ -135,7 +135,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
writer.close(); writer.close();
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());
@ -170,7 +170,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
writer.close(); writer.close();
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());
@ -228,7 +228,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
assertEquals(ir.numDocs(), docs.size()); assertEquals(ir.numDocs(), docs.size());
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());

View File

@ -83,7 +83,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
writer.close(); writer.close();
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator();
assertNull(inputIterator.next()); assertNull(inputIterator.next());
assertEquals(inputIterator.weight(), 0); assertEquals(inputIterator.weight(), 0);
@ -109,7 +109,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());
@ -141,7 +141,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd)); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd));
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());
@ -195,7 +195,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)}; ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());
@ -226,7 +226,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator();
BytesRef f; BytesRef f;
while((f = inputIterator.next())!=null) { while((f = inputIterator.next())!=null) {
Document doc = docs.remove(f.utf8ToString()); Document doc = docs.remove(f.utf8ToString());

View File

@ -76,7 +76,7 @@ public class FileDictionaryTest extends LuceneTestCase {
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8"));
FileDictionary dictionary = new FileDictionary(inputReader); FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey(); List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getWordsIterator(); InputIterator inputIter = dictionary.getEntryIterator();
assertFalse(inputIter.hasPayloads()); assertFalse(inputIter.hasPayloads());
BytesRef term; BytesRef term;
int count = 0; int count = 0;
@ -98,7 +98,7 @@ public class FileDictionaryTest extends LuceneTestCase {
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8"));
FileDictionary dictionary = new FileDictionary(inputReader); FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey(); List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getWordsIterator(); InputIterator inputIter = dictionary.getEntryIterator();
assertFalse(inputIter.hasPayloads()); assertFalse(inputIter.hasPayloads());
BytesRef term; BytesRef term;
int count = 0; int count = 0;
@ -120,7 +120,7 @@ public class FileDictionaryTest extends LuceneTestCase {
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8"));
FileDictionary dictionary = new FileDictionary(inputReader); FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey(); List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getWordsIterator(); InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads()); assertTrue(inputIter.hasPayloads());
BytesRef term; BytesRef term;
int count = 0; int count = 0;
@ -146,7 +146,7 @@ public class FileDictionaryTest extends LuceneTestCase {
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8"));
FileDictionary dictionary = new FileDictionary(inputReader); FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey(); List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getWordsIterator(); InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads()); assertTrue(inputIter.hasPayloads());
BytesRef term; BytesRef term;
int count = 0; int count = 0;
@ -173,7 +173,7 @@ public class FileDictionaryTest extends LuceneTestCase {
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8"));
FileDictionary dictionary = new FileDictionary(inputReader, " , "); FileDictionary dictionary = new FileDictionary(inputReader, " , ");
List<List<String>> entries = fileInput.getKey(); List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getWordsIterator(); InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads()); assertTrue(inputIter.hasPayloads());
BytesRef term; BytesRef term;
int count = 0; int count = 0;

View File

@ -35,7 +35,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase {
writer.close(); writer.close();
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
BytesRefIterator tf = dictionary.getWordsIterator(); BytesRefIterator tf = dictionary.getEntryIterator();
assertNull(tf.next()); assertNull(tf.next());
dir.close(); dir.close();
} }

View File

@ -110,6 +110,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
} }
}; };
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
assertEquals(2, suggester.getCount());
suggester.close(); suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@ -123,6 +124,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key); assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
assertEquals(10, results.get(0).value); assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload); assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals(2, suggester.getCount());
suggester.close(); suggester.close();
} }

View File

@ -912,6 +912,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("a c", 4), new Input("a c", 4),
})); }));
assertEquals(3, suggester.getCount());
List<LookupResult> results = suggester.lookup("a", false, 3); List<LookupResult> results = suggester.lookup("a", false, 3);
assertEquals(3, results.size()); assertEquals(3, results.size());
assertEquals("a", results.get(0).key); assertEquals("a", results.get(0).key);
@ -935,6 +936,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
suggester.load(is); suggester.load(is);
is.close(); is.close();
assertEquals(3, suggester.getCount());
results = suggester.lookup("a", false, 3); results = suggester.lookup("a", false, 3);
assertEquals(3, results.size()); assertEquals(3, results.size());
assertEquals("a", results.get(0).key); assertEquals("a", results.get(0).key);

View File

@ -62,6 +62,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random()); Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new InputArrayIterator(keys)); sug.build(new InputArrayIterator(keys));
assertEquals(2, sug.getCount());
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
@ -95,6 +96,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.load(is); sug.load(is);
is.close(); is.close();
assertEquals(2, sug.getCount());
} }
} }
@ -171,6 +173,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public boolean hasPayloads() { public boolean hasPayloads() {
return false; return false;
} }
}); });
if (VERBOSE) { if (VERBOSE) {
System.out.println(sug.sizeInBytes() + " bytes"); System.out.println(sug.sizeInBytes() + " bytes");

View File

@ -181,6 +181,7 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup(); FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new InputArrayIterator(input)); lookup.build(new InputArrayIterator(input));
assertEquals(input.size(), lookup.getCount());
for (Input tf : input) { for (Input tf : input) {
assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());

View File

@ -156,6 +156,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
assertEquals(numWords, suggester.getCount());
Random random = new Random(random().nextLong()); Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) { for (String prefix : allPrefixes) {
final int topN = _TestUtil.nextInt(random, 1, 10); final int topN = _TestUtil.nextInt(random, 1, 10);
@ -215,6 +216,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new InputArrayIterator(new Input[0])); suggester.build(new InputArrayIterator(new Input[0]));
assertEquals(0, suggester.getCount());
List<LookupResult> result = suggester.lookup("a", false, 20); List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty()); assertTrue(result.isEmpty());
} }