From b5926da2d67820f5fdf8c8ec9b2e642b74d12070 Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Fri, 7 Feb 2014 20:58:19 +0000 Subject: [PATCH] LUCENE-5404: Add .getCount method to all suggesters (Lookup), persist count metadata on .store(), Dictionary returns InputIterator, Dictionary.getWordIterator renamed to .getEntryIterator git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565810 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 6 +- .../lucene/search/spell/Dictionary.java | 11 ++-- .../search/spell/HighFrequencyDictionary.java | 2 +- .../lucene/search/spell/LuceneDictionary.java | 7 ++- .../search/spell/PlainTextDictionary.java | 5 +- .../lucene/search/spell/SpellChecker.java | 2 +- .../search/suggest/DocumentDictionary.java | 7 +-- .../DocumentValueSourceDictionary.java | 2 +- .../lucene/search/suggest/FileDictionary.java | 2 +- .../lucene/search/suggest/InputIterator.java | 3 + .../apache/lucene/search/suggest/Lookup.java | 59 +++++++++++++----- .../analyzing/AnalyzingInfixSuggester.java | 22 +++++-- .../suggest/analyzing/AnalyzingSuggester.java | 48 +++++++-------- .../suggest/analyzing/FreeTextSuggester.java | 43 +++++++------ .../suggest/fst/FSTCompletionLookup.java | 49 ++++++++------- .../suggest/fst/WFSTCompletionLookup.java | 39 ++++++------ .../search/suggest/jaspell/JaspellLookup.java | 56 +++++++++-------- .../lucene/search/suggest/tst/TSTLookup.java | 60 +++++++++---------- .../search/spell/TestLuceneDictionary.java | 12 ++-- .../suggest/DocumentDictionaryTest.java | 8 +-- .../DocumentValueSourceDictionaryTest.java | 10 ++-- .../search/suggest/FileDictionaryTest.java | 10 ++-- .../suggest/TestHighFrequencyDictionary.java | 2 +- .../AnalyzingInfixSuggesterTest.java | 2 + .../analyzing/AnalyzingSuggesterTest.java | 2 + .../analyzing/TestFreeTextSuggester.java | 3 + .../search/suggest/fst/FSTCompletionTest.java | 1 + .../suggest/fst/WFSTCompletionTest.java | 2 + 28 files changed, 269 insertions(+), 206 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0f11eccdfef..58b554481a9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -77,6 +77,10 @@ New Features (missing the term, weight or payload). (Areek Zillur via Mike McCandless) +* LUCENE-5404: Add .getCount method to all suggesters (Lookup); persist count + metadata on .store(); Dictionary returns InputIterator; Dictionary.getWordIterator + renamed to .getEntryIterator. (Areek Zillur) + * SOLR-1871: The RangeMapFloatFunction accepts an arbitrary ValueSource as target and default values. (Chris Harris, shalin) @@ -386,7 +390,7 @@ New Features * LUCENE-5323: Add .sizeInBytes method to all suggesters (Lookup). (Areek Zillur via Mike McCandless) - + * LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure to never split up blocks of documents indexed with IndexWriter.addDocuments. (Adrien Grand) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java index d1ed4e732a6..59194b84fd7 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java @@ -17,19 +17,20 @@ package org.apache.lucene.search.spell; */ import java.io.IOException; -import org.apache.lucene.util.BytesRefIterator; + +import org.apache.lucene.search.suggest.InputIterator; /** * A simple interface representing a Dictionary. A Dictionary - * here is just a list of words. + * here is a list of entries, where every entry consists of + * term, weight and payload. * - * */ public interface Dictionary { /** - * Return all words present in the dictionary + * Returns an iterator over all the entries * @return Iterator */ - BytesRefIterator getWordsIterator() throws IOException; + InputIterator getEntryIterator() throws IOException; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 826ba28a73c..0150ed18f5b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -56,7 +56,7 @@ public class HighFrequencyDictionary implements Dictionary { } @Override - public final BytesRefIterator getWordsIterator() throws IOException { + public final InputIterator getEntryIterator() throws IOException { return new HighFrequencyIterator(); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java index b5d66279d6c..d55e5bbc5d1 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spell; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; @@ -42,12 +43,12 @@ public class LuceneDictionary implements Dictionary { } @Override - public final BytesRefIterator getWordsIterator() throws IOException { + public final InputIterator getEntryIterator() throws IOException { final Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { - return terms.iterator(null); + return new InputIterator.InputIteratorWrapper(terms.iterator(null)); } else { - return BytesRefIterator.EMPTY; + return InputIterator.EMPTY; } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java index 7071ff7cb28..5e77021af79 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.IOUtils; @@ -66,8 +67,8 @@ public class PlainTextDictionary implements Dictionary { } @Override - public BytesRefIterator getWordsIterator() throws IOException { - return new FileIterator(); + public InputIterator getEntryIterator() throws IOException { + return new InputIterator.InputIteratorWrapper(new FileIterator()); } final class FileIterator implements BytesRefIterator { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java index 6f5f399ef9a..e61a2879114 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -512,7 +512,7 @@ public class SpellChecker implements java.io.Closeable { boolean isEmpty = termsEnums.isEmpty(); try { - BytesRefIterator iter = dict.getWordsIterator(); + BytesRefIterator iter = dict.getEntryIterator(); BytesRef currentTerm; terms: while ((currentTerm = iter.next()) != null) { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java index d948e20edb8..91fdf24ff56 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java @@ -86,7 +86,7 @@ public class DocumentDictionary implements Dictionary { } @Override - public BytesRefIterator getWordsIterator() throws IOException { + public InputIterator getEntryIterator() throws IOException { return new DocumentInputIterator(payloadField!=null); } @@ -102,7 +102,6 @@ public class DocumentDictionary implements Dictionary { private BytesRef currentPayload = null; private final NumericDocValues weightValues; - /** * Creates an iterator over term, weight and payload fields from the lucene * index. setting withPayload to false, implies an iterator @@ -150,7 +149,7 @@ public class DocumentDictionary implements Dictionary { currentPayload = tempPayload; currentWeight = getWeight(doc, currentDocId); - + return tempTerm; } return null; @@ -165,7 +164,7 @@ public class DocumentDictionary implements Dictionary { public boolean hasPayloads() { return hasPayloads; } - + /** * Returns the value of the weightField for the current document. * Retrieves the value for the weightField if its stored (using doc) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java index cbc5763b6bd..d5f720e9ec6 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java @@ -92,7 +92,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary { } @Override - public BytesRefIterator getWordsIterator() throws IOException { + public InputIterator getEntryIterator() throws IOException { return new DocumentValueSourceInputIterator(payloadField!=null); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index 5e5968574d0..28921be46ff 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -106,7 +106,7 @@ public class FileDictionary implements Dictionary { } @Override - public InputIterator getWordsIterator() { + public InputIterator getEntryIterator() { try { return new FileIterator(); } catch (IOException e) { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java index bda13321012..c98825d4fd6 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java @@ -44,6 +44,9 @@ public interface InputIterator extends BytesRefIterator { /** Returns true if the iterator has payloads */ public boolean hasPayloads(); + /** Singleton InputIterator that iterates over 0 BytesRefs. */ + public static final InputIterator EMPTY = new InputIteratorWrapper(BytesRefIterator.EMPTY); + /** * Wraps a BytesRefIterator as a suggester InputIterator, with all weights * set to 1 and carries no payload diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index 3b4e09ce4fa..8662d54aec3 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -24,8 +24,12 @@ import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.InputStreamDataInput; +import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefIterator; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PriorityQueue; /** @@ -33,6 +37,7 @@ import org.apache.lucene.util.PriorityQueue; * @lucene.experimental */ public abstract class Lookup { + /** * Result of a lookup. */ @@ -157,21 +162,46 @@ public abstract class Lookup { * {@link UnsortedInputIterator} in such case. */ public void build(Dictionary dict) throws IOException { - BytesRefIterator it = dict.getWordsIterator(); - InputIterator tfit; - if (it instanceof InputIterator) { - tfit = (InputIterator)it; - } else { - tfit = new InputIterator.InputIteratorWrapper(it); - } - build(tfit); + build(dict.getEntryIterator()); } + /** + * Calls {@link #load(DataInput)} after converting + * {@link InputStream} to {@link DataInput} + */ + public boolean load(InputStream input) throws IOException { + DataInput dataIn = new InputStreamDataInput(input); + try { + return load(dataIn); + } finally { + IOUtils.close(input); + } + } + + /** + * Calls {@link #store(DataOutput)} after converting + * {@link OutputStream} to {@link DataOutput} + */ + public boolean store(OutputStream output) throws IOException { + DataOutput dataOut = new OutputStreamDataOutput(output); + try { + return store(dataOut); + } finally { + IOUtils.close(output); + } + } + + /** + * Get the number of entries the lookup was built with + * @return total number of suggester entries + */ + public abstract long getCount(); + /** * Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}. * The implementation might re-sort the data internally. */ - public abstract void build(InputIterator tfit) throws IOException; + public abstract void build(InputIterator inputIterator) throws IOException; /** * Look up a key and return possible completion for this key. @@ -183,23 +213,22 @@ public abstract class Lookup { */ public abstract List lookup(CharSequence key, boolean onlyMorePopular, int num); - /** * Persist the constructed lookup data to a directory. Optional operation. - * @param output {@link OutputStream} to write the data to. + * @param output {@link DataOutput} to write the data to. * @return true if successful, false if unsuccessful or not supported. * @throws IOException when fatal IO error occurs. */ - public abstract boolean store(OutputStream output) throws IOException; + public abstract boolean store(DataOutput output) throws IOException; /** * Discard current lookup data and load it from a previously saved copy. * Optional operation. - * @param input the {@link InputStream} to load the lookup data. + * @param input the {@link DataInput} to load the lookup data. * @return true if completed successfully, false if unsuccessful or not supported. * @throws IOException when fatal IO error occurs. */ - public abstract boolean load(InputStream input) throws IOException; + public abstract boolean load(DataInput input) throws IOException; /** * Get the size of the underlying lookup implementation in memory diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 0cf421278df..164819b7918 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -20,8 +20,6 @@ package org.apache.lucene.search.suggest.analyzing; import java.io.Closeable; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.io.StringReader; import java.util.ArrayList; import java.util.HashSet; @@ -68,6 +66,8 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; @@ -107,6 +107,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { private final File indexPath; final int minPrefixChars; private Directory dir; + /** Number of entries the lookup was built with */ + private long count = 0; /** {@link IndexSearcher} used for lookups. */ protected IndexSearcher searcher; @@ -154,12 +156,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { if (DirectoryReader.indexExists(dir)) { // Already built; open it: - searcher = new IndexSearcher(DirectoryReader.open(dir)); + IndexReader reader = DirectoryReader.open(dir); + searcher = new IndexSearcher(reader); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads"); weightsDV = MultiDocValues.getNumericValues(searcher.getIndexReader(), "weight"); textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME); + count = reader.numDocs(); assert textDV != null; } } @@ -194,6 +198,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { IndexWriter w2 = null; AtomicReader r = null; boolean success = false; + count = 0; try { Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override @@ -239,7 +244,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } else { payloadField = null; } - //long t0 = System.nanoTime(); while ((text = iter.next()) != null) { String textString = text.utf8ToString(); @@ -251,6 +255,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { payloadField.setBytesValue(iter.payload()); } w.addDocument(doc); + count++; } //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); @@ -612,12 +617,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } @Override - public boolean store(OutputStream out) { + public boolean store(DataOutput in) throws IOException { return false; } @Override - public boolean load(InputStream out) { + public boolean load(DataInput out) throws IOException { return false; } @@ -637,4 +642,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { public long sizeInBytes() { return RamUsageEstimator.sizeOf(this); } + + @Override + public long getCount() { + return count; + } }; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 4278440e219..6b2c1f6bbe1 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -19,8 +19,6 @@ package org.apache.lucene.search.suggest.analyzing; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -38,8 +36,6 @@ import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.InputStreamDataInput; -import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; @@ -187,6 +183,9 @@ public class AnalyzingSuggester extends Lookup { /** Whether position holes should appear in the automaton. */ private boolean preservePositionIncrements; + /** Number of entries the lookup was built with */ + private long count = 0; + /** * Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean) * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | @@ -394,6 +393,7 @@ public class AnalyzingSuggester extends Lookup { TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); boolean success = false; + count = 0; byte buffer[] = new byte[8]; try { ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); @@ -458,6 +458,7 @@ public class AnalyzingSuggester extends Lookup { writer.write(buffer, 0, output.getPosition()); } + count++; } writer.close(); @@ -571,32 +572,24 @@ public class AnalyzingSuggester extends Lookup { } @Override - public boolean store(OutputStream output) throws IOException { - DataOutput dataOut = new OutputStreamDataOutput(output); - try { - if (fst == null) { - return false; - } - - fst.save(dataOut); - dataOut.writeVInt(maxAnalyzedPathsForOneInput); - dataOut.writeByte((byte) (hasPayloads ? 1 : 0)); - } finally { - IOUtils.close(output); + public boolean store(DataOutput output) throws IOException { + output.writeVLong(count); + if (fst == null) { + return false; } + + fst.save(output); + output.writeVInt(maxAnalyzedPathsForOneInput); + output.writeByte((byte) (hasPayloads ? 1 : 0)); return true; } @Override - public boolean load(InputStream input) throws IOException { - DataInput dataIn = new InputStreamDataInput(input); - try { - this.fst = new FST>(dataIn, new PairOutputs(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); - maxAnalyzedPathsForOneInput = dataIn.readVInt(); - hasPayloads = dataIn.readByte() == 1; - } finally { - IOUtils.close(input); - } + public boolean load(DataInput input) throws IOException { + count = input.readVLong(); + this.fst = new FST>(input, new PairOutputs(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); + maxAnalyzedPathsForOneInput = input.readVInt(); + hasPayloads = input.readByte() == 1; return true; } @@ -809,6 +802,11 @@ public class AnalyzingSuggester extends Lookup { throw new RuntimeException(bogus); } } + + @Override + public long getCount() { + return count; + } /** Returns all prefix paths to initialize the search. */ protected List>> getFullPrefixPaths(List>> prefixPaths, diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index e901ef7f16e..db332474c9a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -22,8 +22,6 @@ package org.apache.lucene.search.suggest.analyzing; // - add pruning of low-freq ngrams? import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; //import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; @@ -62,8 +60,6 @@ import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.InputStreamDataInput; -import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; @@ -153,6 +149,10 @@ public class FreeTextSuggester extends Lookup { private final int grams; private final byte separator; + + /** Number of entries the lookup was built with */ + private long count = 0; + /** The default character used to join multiple tokens * into a single ngram token. The input tokens produced * by the analyzer must not contain this character. */ @@ -320,6 +320,7 @@ public class FreeTextSuggester extends Lookup { IndexReader reader = null; boolean success = false; + count = 0; try { while (true) { BytesRef surfaceForm = iterator.next(); @@ -328,6 +329,7 @@ public class FreeTextSuggester extends Lookup { } field.setStringValue(surfaceForm.utf8ToString()); writer.addDocument(doc); + count++; } reader = DirectoryReader.open(writer, false); @@ -397,31 +399,31 @@ public class FreeTextSuggester extends Lookup { } @Override - public boolean store(OutputStream output) throws IOException { - DataOutput out = new OutputStreamDataOutput(output); - CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); - out.writeByte(separator); - out.writeVInt(grams); - out.writeVLong(totTokens); - fst.save(out); + public boolean store(DataOutput output) throws IOException { + CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT); + output.writeVLong(count); + output.writeByte(separator); + output.writeVInt(grams); + output.writeVLong(totTokens); + fst.save(output); return true; } @Override - public boolean load(InputStream input) throws IOException { - DataInput in = new InputStreamDataInput(input); - CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); - byte separatorOrig = in.readByte(); + public boolean load(DataInput input) throws IOException { + CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START); + count = input.readVLong(); + byte separatorOrig = input.readByte(); if (separatorOrig != separator) { throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); } - int gramsOrig = in.readVInt(); + int gramsOrig = input.readVInt(); if (gramsOrig != grams) { throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig); } - totTokens = in.readVLong(); + totTokens = input.readVLong(); - fst = new FST(in, PositiveIntOutputs.getSingleton()); + fst = new FST(input, PositiveIntOutputs.getSingleton()); return true; } @@ -436,6 +438,11 @@ public class FreeTextSuggester extends Lookup { } } + @Override + public long getCount() { + return count; + } + private int countGrams(BytesRef token) { int count = 1; for(int i=0;i= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } output.reset(buffer); - output.writeInt(encodeWeight(tfit.weight())); + output.writeInt(encodeWeight(iterator.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); } @@ -207,6 +213,7 @@ public class FSTCompletionLookup extends Lookup { builder.add(tmp2, bucket); line++; + count++; } // The two FSTCompletions share the same automaton. @@ -264,28 +271,21 @@ public class FSTCompletionLookup extends Lookup { @Override - public synchronized boolean store(OutputStream output) throws IOException { - - try { - if (this.normalCompletion == null || normalCompletion.getFST() == null) - return false; - normalCompletion.getFST().save(new OutputStreamDataOutput(output)); - } finally { - IOUtils.close(output); - } + public synchronized boolean store(DataOutput output) throws IOException { + output.writeVLong(count); + if (this.normalCompletion == null || normalCompletion.getFST() == null) + return false; + normalCompletion.getFST().save(output); return true; } @Override - public synchronized boolean load(InputStream input) throws IOException { - try { - this.higherWeightsCompletion = new FSTCompletion(new FST( - new InputStreamDataInput(input), NoOutputs.getSingleton())); - this.normalCompletion = new FSTCompletion( - higherWeightsCompletion.getFST(), false, exactMatchFirst); - } finally { - IOUtils.close(input); - } + public synchronized boolean load(DataInput input) throws IOException { + count = input.readVLong(); + this.higherWeightsCompletion = new FSTCompletion(new FST( + input, NoOutputs.getSingleton())); + this.normalCompletion = new FSTCompletion( + higherWeightsCompletion.getFST(), false, exactMatchFirst); return true; } @@ -293,4 +293,9 @@ public class FSTCompletionLookup extends Lookup { public long sizeInBytes() { return RamUsageEstimator.sizeOf(this); } + + @Override + public long getCount() { + return count; + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index eaff4049342..ab668413831 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -18,8 +18,6 @@ package org.apache.lucene.search.suggest.fst; */ import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -31,12 +29,11 @@ import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.InputStreamDataInput; -import org.apache.lucene.store.OutputStreamDataOutput; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; @@ -72,6 +69,9 @@ public class WFSTCompletionLookup extends Lookup { */ private final boolean exactFirst; + /** Number of entries the lookup was built with */ + private long count = 0; + /** * Calls {@link #WFSTCompletionLookup(boolean) WFSTCompletionLookup(true)} */ @@ -96,6 +96,7 @@ public class WFSTCompletionLookup extends Lookup { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } + count = 0; BytesRef scratch = new BytesRef(); InputIterator iter = new WFSTInputIterator(iterator); IntsRef scratchInts = new IntsRef(); @@ -114,31 +115,26 @@ public class WFSTCompletionLookup extends Lookup { Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); + count++; } fst = builder.finish(); } @Override - public boolean store(OutputStream output) throws IOException { - try { - if (fst == null) { - return false; - } - fst.save(new OutputStreamDataOutput(output)); - } finally { - IOUtils.close(output); + public boolean store(DataOutput output) throws IOException { + output.writeVLong(count); + if (fst == null) { + return false; } + fst.save(output); return true; } @Override - public boolean load(InputStream input) throws IOException { - try { - this.fst = new FST(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton()); - } finally { - IOUtils.close(input); - } + public boolean load(DataInput input) throws IOException { + count = input.readVLong(); + this.fst = new FST(input, PositiveIntOutputs.getSingleton()); return true; } @@ -293,4 +289,9 @@ public class WFSTCompletionLookup extends Lookup { public long sizeInBytes() { return (fst == null) ? 0 : fst.sizeInBytes(); } + + @Override + public long getCount() { + return count; + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 65c453249de..58207ef8217 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -17,20 +17,17 @@ package org.apache.lucene.search.suggest.jaspell; * limitations under the License. */ -import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; @@ -44,6 +41,9 @@ public class JaspellLookup extends Lookup { JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); private boolean usePrefix = true; private int editDistance = 2; + + /** Number of entries the lookup was built with */ + private long count = 0; /** * Creates a new empty trie @@ -52,23 +52,25 @@ public class JaspellLookup extends Lookup { public JaspellLookup() {} @Override - public void build(InputIterator tfit) throws IOException { - if (tfit.hasPayloads()) { + public void build(InputIterator iterator) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } + count = 0; trie = new JaspellTernarySearchTrie(); trie.setMatchAlmostDiff(editDistance); BytesRef spare; final CharsRef charsSpare = new CharsRef(); - while ((spare = tfit.next()) != null) { - final long weight = tfit.weight(); + while ((spare = iterator.next()) != null) { + final long weight = iterator.weight(); if (spare.length == 0) { continue; } charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); trie.put(charsSpare.toString(), Long.valueOf(weight)); + count++; } } @@ -131,8 +133,8 @@ public class JaspellLookup extends Lookup { private static final byte HI_KID = 0x04; private static final byte HAS_VALUE = 0x08; - private void readRecursively(DataInputStream in, TSTNode node) throws IOException { - node.splitchar = in.readChar(); + private void readRecursively(DataInput in, TSTNode node) throws IOException { + node.splitchar = in.readString().charAt(0); byte mask = in.readByte(); if ((mask & HAS_VALUE) != 0) { node.data = Long.valueOf(in.readLong()); @@ -154,11 +156,11 @@ public class JaspellLookup extends Lookup { } } - private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException { + private void writeRecursively(DataOutput out, TSTNode node) throws IOException { if (node == null) { return; } - out.writeChar(node.splitchar); + out.writeString(new String(new char[] {node.splitchar}, 0, 1)); byte mask = 0; if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID; if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID; @@ -174,31 +176,22 @@ public class JaspellLookup extends Lookup { } @Override - public boolean store(OutputStream output) throws IOException { + public boolean store(DataOutput output) throws IOException { + output.writeVLong(count); TSTNode root = trie.getRoot(); if (root == null) { // empty tree return false; } - DataOutputStream out = new DataOutputStream(output); - try { - writeRecursively(out, root); - out.flush(); - } finally { - IOUtils.close(out); - } + writeRecursively(output, root); return true; } @Override - public boolean load(InputStream input) throws IOException { - DataInputStream in = new DataInputStream(input); + public boolean load(DataInput input) throws IOException { + count = input.readVLong(); TSTNode root = trie.new TSTNode('\0', null); - try { - readRecursively(in, root); - trie.setRoot(root); - } finally { - IOUtils.close(in); - } + readRecursively(input, root); + trie.setRoot(root); return true; } @@ -207,4 +200,9 @@ public class JaspellLookup extends Lookup { public long sizeInBytes() { return RamUsageEstimator.sizeOf(trie); } + + @Override + public long getCount() { + return count; + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 59cd5f99df6..95fc389480a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -17,20 +17,17 @@ package org.apache.lucene.search.suggest.tst; * limitations under the License. */ -import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.SortedInputIterator; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; @@ -43,6 +40,9 @@ import org.apache.lucene.util.UnicodeUtil; public class TSTLookup extends Lookup { TernaryTreeNode root = new TernaryTreeNode(); TSTAutocomplete autocomplete = new TSTAutocomplete(); + + /** Number of entries the lookup was built with */ + private long count = 0; /** * Creates a new TSTLookup with an empty Ternary Search Tree. @@ -51,24 +51,25 @@ public class TSTLookup extends Lookup { public TSTLookup() {} @Override - public void build(InputIterator tfit) throws IOException { - if (tfit.hasPayloads()) { + public void build(InputIterator iterator) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } root = new TernaryTreeNode(); // make sure it's sorted and the comparator uses UTF16 sort order - tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); - + iterator = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUTF16Comparator()); + count = 0; ArrayList tokens = new ArrayList(); ArrayList vals = new ArrayList(); BytesRef spare; CharsRef charsSpare = new CharsRef(); - while ((spare = tfit.next()) != null) { + while ((spare = iterator.next()) != null) { charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); tokens.add(charsSpare.toString()); - vals.add(Long.valueOf(tfit.weight())); + vals.add(Long.valueOf(iterator.weight())); + count++; } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); } @@ -148,11 +149,11 @@ public class TSTLookup extends Lookup { private static final byte HAS_VALUE = 0x10; // pre-order traversal - private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException { - node.splitchar = in.readChar(); + private void readRecursively(DataInput in, TernaryTreeNode node) throws IOException { + node.splitchar = in.readString().charAt(0); byte mask = in.readByte(); if ((mask & HAS_TOKEN) != 0) { - node.token = in.readUTF(); + node.token = in.readString(); } if ((mask & HAS_VALUE) != 0) { node.val = Long.valueOf(in.readLong()); @@ -172,9 +173,9 @@ public class TSTLookup extends Lookup { } // pre-order traversal - private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException { + private void writeRecursively(DataOutput out, TernaryTreeNode node) throws IOException { // write out the current node - out.writeChar(node.splitchar); + out.writeString(new String(new char[] {node.splitchar}, 0, 1)); // prepare a mask of kids byte mask = 0; if (node.eqKid != null) mask |= EQ_KID; @@ -183,7 +184,7 @@ public class TSTLookup extends Lookup { if (node.token != null) mask |= HAS_TOKEN; if (node.val != null) mask |= HAS_VALUE; out.writeByte(mask); - if (node.token != null) out.writeUTF(node.token); + if (node.token != null) out.writeString(node.token); if (node.val != null) out.writeLong(((Number)node.val).longValue()); // recurse and write kids if (node.loKid != null) { @@ -198,26 +199,17 @@ public class TSTLookup extends Lookup { } @Override - public synchronized boolean store(OutputStream output) throws IOException { - DataOutputStream out = new DataOutputStream(output); - try { - writeRecursively(out, root); - out.flush(); - } finally { - IOUtils.close(output); - } + public synchronized boolean store(DataOutput output) throws IOException { + output.writeVLong(count); + writeRecursively(output, root); return true; } @Override - public synchronized boolean load(InputStream input) throws IOException { - DataInputStream in = new DataInputStream(input); + public synchronized boolean load(DataInput input) throws IOException { + count = input.readVLong(); root = new TernaryTreeNode(); - try { - readRecursively(in, root); - } finally { - IOUtils.close(in); - } + readRecursively(input, root); return true; } @@ -227,4 +219,8 @@ public class TSTLookup extends Lookup { return RamUsageEstimator.sizeOf(autocomplete); } + @Override + public long getCount() { + return count; + } } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java b/lucene/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java index 35ed90c70c7..48bdb7b33dc 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java @@ -90,7 +90,7 @@ public class TestLuceneDictionary extends LuceneTestCase { indexReader = DirectoryReader.open(store); ld = new LuceneDictionary(indexReader, "nonexistent_field"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); assertNull("More elements than expected", spare = it.next()); } finally { @@ -103,7 +103,7 @@ public class TestLuceneDictionary extends LuceneTestCase { indexReader = DirectoryReader.open(store); ld = new LuceneDictionary(indexReader, "aaa"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); assertNotNull("First element doesn't exist.", spare = it.next()); assertTrue("First element isn't correct", spare.utf8ToString().equals("foo")); assertNull("More elements than expected", it.next()); @@ -117,7 +117,7 @@ public class TestLuceneDictionary extends LuceneTestCase { indexReader = DirectoryReader.open(store); ld = new LuceneDictionary(indexReader, "contents"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); assertNotNull("First element doesn't exist.", spare = it.next()); assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry")); @@ -126,7 +126,7 @@ public class TestLuceneDictionary extends LuceneTestCase { assertNull("More elements than expected", it.next()); ld = new LuceneDictionary(indexReader, "contents"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); int counter = 2; while (it.next() != null) { @@ -145,7 +145,7 @@ public class TestLuceneDictionary extends LuceneTestCase { indexReader = DirectoryReader.open(store); ld = new LuceneDictionary(indexReader, "contents"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); // just iterate through words assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString()); @@ -162,7 +162,7 @@ public class TestLuceneDictionary extends LuceneTestCase { indexReader = DirectoryReader.open(store); ld = new LuceneDictionary(indexReader, "zzz"); - it = ld.getWordsIterator(); + it = ld.getEntryIterator(); assertNotNull("First element doesn't exist.", spare = it.next()); assertEquals("First element isn't correct", "bar", spare.utf8ToString()); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java index 9e5d8e85901..c0a5185df50 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java @@ -109,7 +109,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); @@ -135,7 +135,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -170,7 +170,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -228,7 +228,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); assertEquals(ir.numDocs(), docs.size()); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java index b1eef56fa11..d168fc65cfa 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java @@ -83,7 +83,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); @@ -109,7 +109,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -141,7 +141,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd)); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -195,7 +195,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)}; Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -226,7 +226,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getEntryIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java index fbeb6df22a4..665af98d46f 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java @@ -76,7 +76,7 @@ public class FileDictionaryTest extends LuceneTestCase { InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); - InputIterator inputIter = dictionary.getWordsIterator(); + InputIterator inputIter = dictionary.getEntryIterator(); assertFalse(inputIter.hasPayloads()); BytesRef term; int count = 0; @@ -98,7 +98,7 @@ public class FileDictionaryTest extends LuceneTestCase { InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); - InputIterator inputIter = dictionary.getWordsIterator(); + InputIterator inputIter = dictionary.getEntryIterator(); assertFalse(inputIter.hasPayloads()); BytesRef term; int count = 0; @@ -120,7 +120,7 @@ public class FileDictionaryTest extends LuceneTestCase { InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); - InputIterator inputIter = dictionary.getWordsIterator(); + InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; @@ -146,7 +146,7 @@ public class FileDictionaryTest extends LuceneTestCase { InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); - InputIterator inputIter = dictionary.getWordsIterator(); + InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; @@ -173,7 +173,7 @@ public class FileDictionaryTest extends LuceneTestCase { InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader, " , "); List> entries = fileInput.getKey(); - InputIterator inputIter = dictionary.getWordsIterator(); + InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java index 576d2a55d55..0d2d786f8ae 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java @@ -35,7 +35,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); - BytesRefIterator tf = dictionary.getWordsIterator(); + BytesRefIterator tf = dictionary.getEntryIterator(); assertNull(tf.next()); dir.close(); } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 6f5fe2c5f67..93842671bbe 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -110,6 +110,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } }; suggester.build(new InputArrayIterator(keys)); + assertEquals(2, suggester.getCount()); suggester.close(); suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { @@ -123,6 +124,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { assertEquals("a penny saved is a penny earned", results.get(0).key); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); + assertEquals(2, suggester.getCount()); suggester.close(); } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index 16ec9999495..13a344aa2dd 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -912,6 +912,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { new Input("a c", 4), })); + assertEquals(3, suggester.getCount()); List results = suggester.lookup("a", false, 3); assertEquals(3, results.size()); assertEquals("a", results.get(0).key); @@ -935,6 +936,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { suggester.load(is); is.close(); + assertEquals(3, suggester.getCount()); results = suggester.lookup("a", false, 3); assertEquals(3, results.size()); assertEquals("a", results.get(0).key); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java index e500d34a66c..ef8ca62d317 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java @@ -62,6 +62,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); sug.build(new InputArrayIterator(keys)); + assertEquals(2, sug.getCount()); for(int i=0;i<2;i++) { @@ -95,6 +96,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); sug.load(is); is.close(); + assertEquals(2, sug.getCount()); } } @@ -171,6 +173,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { public boolean hasPayloads() { return false; } + }); if (VERBOSE) { System.out.println(sug.sizeInBytes() + " bytes"); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index de126731f95..02a07b97da5 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -181,6 +181,7 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new InputArrayIterator(input)); + assertEquals(input.size(), lookup.getCount()); for (Input tf : input) { assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index 56cc62bc7c0..d1c47149642 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -156,6 +156,7 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.build(new InputArrayIterator(keys)); + assertEquals(numWords, suggester.getCount()); Random random = new Random(random().nextLong()); for (String prefix : allPrefixes) { final int topN = _TestUtil.nextInt(random, 1, 10); @@ -215,6 +216,7 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.build(new InputArrayIterator(new Input[0])); + assertEquals(0, suggester.getCount()); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); }