From c1be5650a9b99c2f8d1402c2e921d4036d2c07b1 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 13 Oct 2013 10:26:22 +0000 Subject: [PATCH] LUCENE-5260: cutover all suggesters to TermFreqPayloadIterator git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1531664 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../search/spell/HighFrequencyDictionary.java | 12 +- .../lucene/search/spell/TermFreqIterator.java | 58 ------- .../search/spell/TermFreqPayloadIterator.java | 53 +++++- ...feringTermFreqPayloadIteratorWrapper.java} | 28 +++- .../search/suggest/DocumentDictionary.java | 21 ++- .../lucene/search/suggest/FileDictionary.java | 16 +- .../apache/lucene/search/suggest/Lookup.java | 18 +- ...SortedTermFreqPayloadIteratorWrapper.java} | 62 +++++-- ...sortedTermFreqPayloadIteratorWrapper.java} | 20 ++- .../analyzing/AnalyzingInfixSuggester.java | 16 +- .../suggest/analyzing/AnalyzingSuggester.java | 15 +- .../suggest/analyzing/FreeTextSuggester.java | 7 +- .../suggest/fst/FSTCompletionLookup.java | 11 +- .../suggest/fst/WFSTCompletionLookup.java | 16 +- .../search/suggest/jaspell/JaspellLookup.java | 7 +- .../lucene/search/suggest/tst/TSTLookup.java | 11 +- .../search/suggest/LookupBenchmarkTest.java | 20 +-- .../search/suggest/PersistenceTest.java | 8 +- .../lucene/search/suggest/TermFreq.java | 34 ---- .../search/suggest/TermFreqArrayIterator.java | 60 ------- .../search/suggest/TermFreqPayload.java | 24 ++- .../suggest/TermFreqPayloadArrayIterator.java | 26 ++- .../search/suggest/TestTermFreqIterator.java | 92 ----------- .../suggest/TestTermFreqPayloadIterator.java | 124 ++++++++++++++ .../analyzing/AnalyzingSuggesterTest.java | 154 +++++++++--------- .../suggest/analyzing/FuzzySuggesterTest.java | 144 ++++++++-------- .../analyzing/TestFreeTextSuggester.java | 80 +++++---- .../search/suggest/fst/FSTCompletionTest.java | 32 ++-- .../suggest/fst/WFSTCompletionTest.java | 42 ++--- 30 files changed, 638 insertions(+), 577 deletions(-) delete mode 100644 lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java rename lucene/suggest/src/java/org/apache/lucene/search/suggest/{BufferingTermFreqIteratorWrapper.java => BufferingTermFreqPayloadIteratorWrapper.java} (71%) rename lucene/suggest/src/java/org/apache/lucene/search/suggest/{SortedTermFreqIteratorWrapper.java => SortedTermFreqPayloadIteratorWrapper.java} (70%) rename lucene/suggest/src/java/org/apache/lucene/search/suggest/{UnsortedTermFreqIteratorWrapper.java => UnsortedTermFreqPayloadIteratorWrapper.java} (74%) delete mode 100644 lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java delete mode 100644 lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java delete mode 100644 lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java create mode 100644 lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d65c5c1fe17..789061e1249 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -150,6 +150,10 @@ API Changes: numBits parameter to allow growing/shrinking the copied bitset. You can use FixedBitSet.clone() if you only need to clone the bitset. (Shai Erera) +* LUCENE-5260: Use TermFreqPayloadIterator for all suggesters; those + suggesters that can't support payloads will throw an exception if + hasPayloads() is true. (Areek Zillur via Mike McCandless) + Optimizations * LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 187e3271621..5882fdfce9d 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -59,7 +59,7 @@ public class HighFrequencyDictionary implements Dictionary { return new HighFrequencyIterator(); } - final class HighFrequencyIterator implements TermFreqIterator { + final class HighFrequencyIterator implements TermFreqPayloadIterator { private final BytesRef spare = new BytesRef(); private final TermsEnum termsEnum; private int minNumDocs; @@ -98,5 +98,15 @@ public class HighFrequencyDictionary implements Dictionary { } return null; } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java deleted file mode 100644 index d7ce627b522..00000000000 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.apache.lucene.search.spell; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefIterator; - -/** - * Interface for enumerating term,weight pairs. - */ -public interface TermFreqIterator extends BytesRefIterator { - - /** A term's weight, higher numbers mean better suggestions. */ - public long weight(); - - /** - * Wraps a BytesRefIterator as a TermFreqIterator, with all weights - * set to 1 - */ - public static class TermFreqIteratorWrapper implements TermFreqIterator { - private BytesRefIterator wrapped; - - /** - * Creates a new wrapper, wrapping the specified iterator and - * specifying a weight value of 1 for all terms. - */ - public TermFreqIteratorWrapper(BytesRefIterator wrapped) { - this.wrapped = wrapped; - } - - @Override - public long weight() { - return 1; - } - - @Override - public BytesRef next() throws IOException { - return wrapped.next(); - } - } -} diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java index 5d3a59b416a..e780db4fb6c 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java @@ -17,20 +17,67 @@ package org.apache.lucene.search.spell; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs +import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; /** * Interface for enumerating term,weight,payload triples; - * currently only {@link AnalyzingSuggester} and {@link - * FuzzySuggester} support payloads. + * currently only {@link AnalyzingSuggester}, {@link + * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads. */ -public interface TermFreqPayloadIterator extends TermFreqIterator { +public interface TermFreqPayloadIterator extends BytesRefIterator { + /** A term's weight, higher numbers mean better suggestions. */ + public long weight(); + /** An arbitrary byte[] to record per suggestion. See * {@link LookupResult#payload} to retrieve the payload * for each suggestion. */ public BytesRef payload(); + + /** Returns true if the iterator has payloads */ + public boolean hasPayloads(); + + /** + * Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights + * set to 1 and carries no payload + */ + public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { + private final BytesRefIterator wrapped; + + /** + * Creates a new wrapper, wrapping the specified iterator and + * specifying a weight value of 1 for all terms + * and nullifies associated payloads. + */ + public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) { + this.wrapped = wrapped; + } + + @Override + public long weight() { + return 1; + } + + @Override + public BytesRef next() throws IOException { + return wrapped.next(); + } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java similarity index 71% rename from lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java rename to lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java index 6228667285d..b78ec0e2647 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java @@ -18,7 +18,8 @@ package org.apache.lucene.search.suggest; */ import java.io.IOException; -import org.apache.lucene.search.spell.TermFreqIterator; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -27,22 +28,30 @@ import org.apache.lucene.util.Counter; * This wrapper buffers incoming elements. * @lucene.experimental */ -public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { +public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { // TODO keep this for now /** buffered term entries */ protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); + /** buffered payload entries */ + protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); /** current buffer position */ protected int curPos = -1; /** buffered weights, parallel with {@link #entries} */ protected long[] freqs = new long[1]; private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); + private final boolean hasPayloads; /** Creates a new iterator, buffering entries from the specified iterator */ - public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { BytesRef spare; int freqIndex = 0; + hasPayloads = source.hasPayloads(); while((spare = source.next()) != null) { entries.append(spare); + if (hasPayloads) { + payloads.append(source.payload()); + } if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length+1); } @@ -64,4 +73,17 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { } return null; } + + @Override + public BytesRef payload() { + if (hasPayloads && curPos < payloads.size()) { + return payloads.get(payloadSpare, curPos); + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java index 35199613512..425d9dbbb5e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java @@ -47,12 +47,6 @@ import org.apache.lucene.util.BytesRefIterator; * The term, weight and (optionally) payload fields supplied * are required for ALL documents and has to be stored * - *
  • - * This Dictionary implementation is not compatible with the following Suggesters: - * {@link JaspellLookup}, {@link TSTLookup}, {@link FSTCompletionLookup}, - * {@link WFSTCompletionLookup} and {@link AnalyzingInfixSuggester}. - * see https://issues.apache.org/jira/browse/LUCENE-5260 - *
  • * */ public class DocumentDictionary implements Dictionary { @@ -95,7 +89,7 @@ public class DocumentDictionary implements Dictionary { final class TermWeightPayloadIterator implements TermFreqPayloadIterator { private final int docCount; private final Set relevantFields; - private final boolean withPayload; + private final boolean hasPayloads; private final Bits liveDocs; private int currentDocId = -1; private long currentWeight; @@ -106,13 +100,13 @@ public class DocumentDictionary implements Dictionary { * index. setting withPayload to false, implies an iterator * over only term and weight. */ - public TermWeightPayloadIterator(boolean withPayload) throws IOException { + public TermWeightPayloadIterator(boolean hasPayloads) throws IOException { docCount = reader.maxDoc() - 1; - this.withPayload = withPayload; + this.hasPayloads = hasPayloads; currentPayload = null; liveDocs = MultiFields.getLiveDocs(reader); List relevantFieldList; - if(withPayload) { + if(hasPayloads) { relevantFieldList = Arrays.asList(field, weightField, payloadField); } else { relevantFieldList = Arrays.asList(field, weightField); @@ -135,7 +129,7 @@ public class DocumentDictionary implements Dictionary { StoredDocument doc = reader.document(currentDocId, relevantFields); - if (withPayload) { + if (hasPayloads) { StorableField payload = doc.getField(payloadField); if (payload == null) { throw new IllegalArgumentException(payloadField + " does not exist"); @@ -169,6 +163,11 @@ public class DocumentDictionary implements Dictionary { public BytesRef payload() { return currentPayload; } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index fa242ef89f7..b03033b4771 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -21,7 +21,7 @@ package org.apache.lucene.search.suggest; import java.io.*; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -57,11 +57,11 @@ public class FileDictionary implements Dictionary { } @Override - public TermFreqIterator getWordsIterator() { + public TermFreqPayloadIterator getWordsIterator() { return new FileIterator(); } - final class FileIterator implements TermFreqIterator { + final class FileIterator implements TermFreqPayloadIterator { private long curFreq; private final BytesRef spare = new BytesRef(); @@ -98,5 +98,15 @@ public class FileDictionary implements Dictionary { return null; } } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index a1c64d373f0..edee62ba21e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -24,7 +24,7 @@ import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.PriorityQueue; @@ -154,25 +154,25 @@ public abstract class Lookup { /** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use - * {@link SortedTermFreqIteratorWrapper} or - * {@link UnsortedTermFreqIteratorWrapper} in such case. + * {@link SortedTermFreqPayloadIteratorWrapper} or + * {@link UnsortedTermFreqPayloadIteratorWrapper} in such case. */ public void build(Dictionary dict) throws IOException { BytesRefIterator it = dict.getWordsIterator(); - TermFreqIterator tfit; - if (it instanceof TermFreqIterator) { - tfit = (TermFreqIterator)it; + TermFreqPayloadIterator tfit; + if (it instanceof TermFreqPayloadIterator) { + tfit = (TermFreqPayloadIterator)it; } else { - tfit = new TermFreqIterator.TermFreqIteratorWrapper(it); + tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it); } build(tfit); } /** - * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}. + * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}. * The implementation might re-sort the data internally. */ - public abstract void build(TermFreqIterator tfit) throws IOException; + public abstract void build(TermFreqPayloadIterator tfit) throws IOException; /** * Look up a key and return possible completion for this key. diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java similarity index 70% rename from lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java rename to lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java index 53c4212ac44..b8fa103a90c 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java @@ -21,7 +21,7 @@ import java.io.File; import java.io.IOException; import java.util.Comparator; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Sort.ByteSequencesReader; import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; import org.apache.lucene.store.ByteArrayDataInput; @@ -34,23 +34,25 @@ import org.apache.lucene.util.IOUtils; * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator. * @lucene.experimental */ -public class SortedTermFreqIteratorWrapper implements TermFreqIterator { +public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { - private final TermFreqIterator source; + private final TermFreqPayloadIterator source; private File tempInput; private File tempSorted; private final ByteSequencesReader reader; private final Comparator comparator; + private final boolean hasPayloads; private boolean done = false; private long weight; private final BytesRef scratch = new BytesRef(); + private BytesRef payload = new BytesRef(); /** * Creates a new sorted wrapper, using {@link * BytesRef#getUTF8SortedAsUnicodeComparator} for * sorting. */ - public SortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); } @@ -58,7 +60,8 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { * Creates a new sorted wrapper, sorting by BytesRef * (ascending) then cost (ascending). */ - public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comparator) throws IOException { + public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator comparator) throws IOException { + this.hasPayloads = source.hasPayloads(); this.source = source; this.comparator = comparator; this.reader = sort(); @@ -74,6 +77,9 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { ByteArrayDataInput input = new ByteArrayDataInput(); if (reader.read(scratch)) { weight = decode(scratch, input); + if (hasPayloads) { + payload = decodePayload(scratch, input); + } success = true; return scratch; } @@ -93,6 +99,19 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { return weight; } + @Override + public BytesRef payload() { + if (hasPayloads) { + return payload; + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } + /** Sortes by BytesRef (ascending) then cost (ascending). */ private final Comparator tieBreakByCostComparator = new Comparator() { @@ -111,6 +130,10 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { rightScratch.length = right.length; long leftCost = decode(leftScratch, input); long rightCost = decode(rightScratch, input); + if (hasPayloads) { + decodePayload(leftScratch, input); + decodePayload(rightScratch, input); + } int cmp = comparator.compare(leftScratch, rightScratch); if (cmp != 0) { return cmp; @@ -133,7 +156,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); while ((spare = source.next()) != null) { - encode(writer, output, buffer, spare, source.weight()); + encode(writer, output, buffer, spare, source.payload(), source.weight()); } writer.close(); new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted); @@ -164,13 +187,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { } } - /** encodes an entry (bytes+weight) to the provided writer */ - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException { - if (spare.length + 8 >= buffer.length) { - buffer = ArrayUtil.grow(buffer, spare.length + 8); + /** encodes an entry (bytes+(payload)+weight) to the provided writer */ + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { + int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); + if (requiredLength >= buffer.length) { + buffer = ArrayUtil.grow(buffer, requiredLength); } output.reset(buffer); output.writeBytes(spare.bytes, spare.offset, spare.length); + if (hasPayloads) { + output.writeBytes(payload.bytes, payload.offset, payload.length); + output.writeShort((short) payload.length); + } output.writeLong(weight); writer.write(buffer, 0, output.getPosition()); } @@ -182,4 +210,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator { scratch.length -= 8; // long return tmpInput.readLong(); } + + /** decodes the payload at the current position */ + protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { + tmpInput.reset(scratch.bytes); + tmpInput.skipBytes(scratch.length - 2); // skip to payload size + short payloadLength = tmpInput.readShort(); // read payload size + tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload + BytesRef payloadScratch = new BytesRef(payloadLength); + tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload + payloadScratch.length = payloadLength; + scratch.length -= 2; // payload length info (short) + scratch.length -= payloadLength; // payload + return payloadScratch; + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java similarity index 74% rename from lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java rename to lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java index c242195656e..8aad73b2e2b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java @@ -20,7 +20,7 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Random; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; /** @@ -28,16 +28,17 @@ import org.apache.lucene.util.BytesRef; * random order. * @lucene.experimental */ -public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { +public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper { // TODO keep this for now private final int[] ords; private int currentOrd = -1; private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); /** * Creates a new iterator, wrapping the specified iterator and * returning elements in a random order. */ - public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { super(source); ords = new int[entries.size()]; Random random = new Random(); @@ -54,13 +55,24 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr @Override public long weight() { + assert currentOrd == ords[curPos]; return freqs[currentOrd]; } @Override public BytesRef next() throws IOException { if (++curPos < entries.size()) { - return entries.get(spare, (currentOrd = ords[curPos])); + currentOrd = ords[curPos]; + return entries.get(spare, currentOrd); + } + return null; + } + + @Override + public BytesRef payload() { + if (hasPayloads() && curPos < payloads.size()) { + assert currentOrd == ords[curPos]; + return payloads.get(payloadSpare, currentOrd); } return null; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 632023d1a82..27d73b8a4dc 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -65,7 +65,6 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs import org.apache.lucene.search.suggest.Lookup; @@ -176,19 +175,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } @Override - public void build(TermFreqIterator iter) throws IOException { + public void build(TermFreqPayloadIterator iter) throws IOException { if (searcher != null) { searcher.getIndexReader().close(); searcher = null; } - TermFreqPayloadIterator payloads; - if (iter instanceof TermFreqPayloadIterator) { - payloads = (TermFreqPayloadIterator) iter; - } else { - payloads = null; - } + Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp")); IndexWriter w = null; @@ -236,7 +230,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { doc.add(weightField); Field payloadField; - if (payloads != null) { + if (iter.hasPayloads()) { payloadField = new BinaryDocValuesField("payloads", new BytesRef()); doc.add(payloadField); } else { @@ -250,8 +244,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { textGramField.setStringValue(textString); textDVField.setBytesValue(text); weightField.setLongValue(iter.weight()); - if (payloads != null) { - payloadField.setBytesValue(payloads.payload()); + if (iter.hasPayloads()) { + payloadField.setBytesValue(iter.payload()); } w.addDocument(doc); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 77f0f1c573f..0b6ff71eecf 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -31,7 +31,6 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; @@ -381,19 +380,13 @@ public class AnalyzingSuggester extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { + public void build(TermFreqPayloadIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); File directory = Sort.defaultTempDir(); File tempInput = File.createTempFile(prefix, ".input", directory); File tempSorted = File.createTempFile(prefix, ".sorted", directory); - TermFreqPayloadIterator payloads; - if (iterator instanceof TermFreqPayloadIterator) { - payloads = (TermFreqPayloadIterator) iterator; - } else { - payloads = null; - } - hasPayloads = payloads != null; + hasPayloads = iterator.hasPayloads(); Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); Sort.ByteSequencesReader reader = null; @@ -432,7 +425,7 @@ public class AnalyzingSuggester extends Lookup { if (surfaceForm.length > (Short.MAX_VALUE-2)) { throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")"); } - payload = payloads.payload(); + payload = iterator.payload(); // payload + surfaceLength (short) requiredLength += payload.length + 2; } else { @@ -470,7 +463,7 @@ public class AnalyzingSuggester extends Lookup { writer.close(); // Sort all input/output pairs (required by FST.Builder): - new Sort(new AnalyzingComparator(payloads != null)).sort(tempInput, tempSorted); + new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted); // Free disk space: tempInput.delete(); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index d2f652df10d..cee929bca71 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -54,7 +54,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; @@ -274,15 +273,15 @@ public class FreeTextSuggester extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { + public void build(TermFreqPayloadIterator iterator) throws IOException { build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); } /** Build the suggest index, using up to the specified * amount of temporary RAM while building. Note that * the weights for the suggestions are ignored. */ - public void build(TermFreqIterator iterator, double ramBufferSizeMB) throws IOException { - if (iterator instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("payloads are not supported"); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 2f4fe0537a3..2bc0aec2db8 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -24,7 +24,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.SortInfo; @@ -43,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs; * An adapter from {@link Lookup} API to {@link FSTCompletion}. * *

    This adapter differs from {@link FSTCompletion} in that it attempts - * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()} + * to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()} * to match the number of buckets. For the rationale for bucketing, see * {@link FSTCompletion}. * @@ -96,7 +95,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqIterator)} method. The number of weight + * {@link #build(TermFreqPayloadIterator)} method. The number of weight * discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and * exact matches are promoted to the top of the suggestions list. */ @@ -106,7 +105,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqIterator)} method. + * {@link #build(TermFreqPayloadIterator)} method. * * @param buckets * The number of weight discretization buckets (see @@ -141,8 +140,8 @@ public class FSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } File tempInput = File.createTempFile( diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index f634bee80ed..982cab5e077 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -25,11 +25,10 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; -import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.InputStreamDataInput; @@ -93,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { - if (iterator instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator iterator) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } BytesRef scratch = new BytesRef(); - TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator); + TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -255,14 +254,15 @@ public class WFSTCompletionLookup extends Lookup { return Integer.MAX_VALUE - (int)value; } - private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper { + private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper { - WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException { super(source); + assert source.hasPayloads() == false; } @Override - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException { + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { if (spare.length + 4 >= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 558e115440e..83ac51279f4 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -25,7 +25,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; @@ -47,13 +46,13 @@ public class JaspellLookup extends Lookup { /** * Creates a new empty trie - * @see #build(TermFreqIterator) + * @see #build(TermFreqPayloadIterator) * */ public JaspellLookup() {} @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } trie = new JaspellTernarySearchTrie(); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 852ebb56c9a..6eb173c7d93 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -25,10 +25,9 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; @@ -46,19 +45,19 @@ public class TSTLookup extends Lookup { /** * Creates a new TSTLookup with an empty Ternary Search Tree. - * @see #build(TermFreqIterator) + * @see #build(TermFreqPayloadIterator) */ public TSTLookup() {} @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } root = new TernaryTreeNode(); // make sure it's sorted and the comparator uses UTF16 sort order - tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); + tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); ArrayList tokens = new ArrayList(); ArrayList vals = new ArrayList(); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index f71318c60ca..f57d5d36a15 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Input term/weight pairs. */ - private static TermFreq [] dictionaryInput; + private static TermFreqPayload [] dictionaryInput; /** * Benchmark term/weight pairs (randomized order). */ - private static List benchmarkInput; + private static List benchmarkInput; /** * Loads terms and frequencies from Wikipedia (cached). @@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase { @BeforeClass public static void setup() throws Exception { assert false : "disable assertions before running benchmarks!"; - List input = readTop50KWiki(); + List input = readTop50KWiki(); Collections.shuffle(input, random); - LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]); + LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]); Collections.shuffle(input, random); LookupBenchmarkTest.benchmarkInput = input; } @@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Collect the multilingual input for benchmarks/ tests. */ - public static List readTop50KWiki() throws Exception { - List input = new ArrayList(); + public static List readTop50KWiki() throws Exception { + List input = new ArrayList(); URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8"); assert resource != null : "Resource missing: Top50KWiki.utf8"; @@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { assertTrue("No | separator?: " + line, tab >= 0); int weight = Integer.parseInt(line.substring(tab + 1)); String key = line.substring(0, tab); - input.add(new TermFreq(key, weight)); + input.add(new TermFreqPayload(key, weight)); } br.close(); return input; @@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Create {@link Lookup} instance and populate it. */ - private Lookup buildLookup(Class cls, TermFreq[] input) throws Exception { + private Lookup buildLookup(Class cls, TermFreqPayload[] input) throws Exception { Lookup lookup = null; try { lookup = cls.newInstance(); @@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { lookup = ctor.newInstance(a); } } - lookup.build(new TermFreqArrayIterator(input)); + lookup.build(new TermFreqPayloadArrayIterator(input)); return lookup; } @@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { final Lookup lookup = buildLookup(cls, dictionaryInput); final List input = new ArrayList(benchmarkInput.size()); - for (TermFreq tf : benchmarkInput) { + for (TermFreqPayload tf : benchmarkInput) { String s = tf.term.utf8ToString(); String sub = s.substring(0, Math.min(s.length(), minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java index ef948a8b510..24398573db1 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java @@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase { // Add all input keys. Lookup lookup = lookupClass.newInstance(); - TermFreq[] keys = new TermFreq[this.keys.length]; + TermFreqPayload[] keys = new TermFreqPayload[this.keys.length]; for (int i = 0; i < keys.length; i++) - keys[i] = new TermFreq(this.keys[i], i); - lookup.build(new TermFreqArrayIterator(keys)); + keys[i] = new TermFreqPayload(this.keys[i], i); + lookup.build(new TermFreqPayloadArrayIterator(keys)); // Store the suggester. File storeDir = TEMP_DIR; @@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase { // Assert validity. Random random = random(); long previous = Long.MIN_VALUE; - for (TermFreq k : keys) { + for (TermFreqPayload k : keys) { List list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1); assertEquals(1, list.size()); LookupResult lookupResult = list.get(0); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java deleted file mode 100644 index 2b02ac12e1e..00000000000 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.apache.lucene.search.suggest; - -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public final class TermFreq { - public final BytesRef term; - public final long v; - - public TermFreq(String term, long v) { - this(new BytesRef(term), v); - } - - public TermFreq(BytesRef term, long v) { - this.term = term; - this.v = v; - } -} \ No newline at end of file diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java deleted file mode 100644 index d77fa5cfca9..00000000000 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Arrays; -import java.util.Iterator; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.util.BytesRef; - -/** - * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. - */ -public final class TermFreqArrayIterator implements TermFreqIterator { - private final Iterator i; - private TermFreq current; - private final BytesRef spare = new BytesRef(); - - public TermFreqArrayIterator(Iterator i) { - this.i = i; - } - - public TermFreqArrayIterator(TermFreq [] i) { - this(Arrays.asList(i)); - } - - public TermFreqArrayIterator(Iterable i) { - this(i.iterator()); - } - - @Override - public long weight() { - return current.v; - } - - @Override - public BytesRef next() { - if (i.hasNext()) { - current = i.next(); - spare.copyBytes(current.term); - return spare; - } - return null; - } -} \ No newline at end of file diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java index 7640281bffe..5463a1376d6 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java @@ -23,14 +23,32 @@ public final class TermFreqPayload { public final BytesRef term; public final long v; public final BytesRef payload; + public final boolean hasPayloads; - public TermFreqPayload(String term, long v, BytesRef payload) { - this(new BytesRef(term), v, payload); + public TermFreqPayload(BytesRef term, long v, BytesRef payload) { + this(term, v, payload, true); } - public TermFreqPayload(BytesRef term, long v, BytesRef payload) { + public TermFreqPayload(String term, long v, BytesRef payload) { + this(new BytesRef(term), v, payload, true); + } + + public TermFreqPayload(BytesRef term, long v) { + this(term, v, null, false); + } + + public TermFreqPayload(String term, long v) { + this(new BytesRef(term), v, null, false); + } + + public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) { this.term = term; this.v = v; this.payload = payload; + this.hasPayloads = hasPayloads; + } + + public boolean hasPayloads() { + return hasPayloads; } } \ No newline at end of file diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java index 5bfb073251b..6583f73326c 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java @@ -20,26 +20,33 @@ package org.apache.lucene.search.suggest; import java.util.Arrays; import java.util.Iterator; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; /** - * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. + * A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s. */ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator { private final Iterator i; + private final boolean hasPayloads; + private boolean first; private TermFreqPayload current; private final BytesRef spare = new BytesRef(); public TermFreqPayloadArrayIterator(Iterator i) { this.i = i; + if (i.hasNext()) { + current = i.next(); + first = true; + this.hasPayloads = current.hasPayloads; + } else { + this.hasPayloads = false; + } } public TermFreqPayloadArrayIterator(TermFreqPayload[] i) { this(Arrays.asList(i)); } - public TermFreqPayloadArrayIterator(Iterable i) { this(i.iterator()); } @@ -51,8 +58,12 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat @Override public BytesRef next() { - if (i.hasNext()) { - current = i.next(); + if (i.hasNext() || (first && current!=null)) { + if (first) { + first = false; + } else { + current = i.next(); + } spare.copyBytes(current.term); return spare; } @@ -63,4 +74,9 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat public BytesRef payload() { return current.payload; } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } } \ No newline at end of file diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java deleted file mode 100644 index 3209b1ada84..00000000000 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.Random; -import java.util.TreeMap; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -public class TestTermFreqIterator extends LuceneTestCase { - public void testEmpty() throws Exception { - TermFreqArrayIterator iterator = new TermFreqArrayIterator(new TermFreq[0]); - TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); - assertNull(wrapper.next()); - wrapper = new UnsortedTermFreqIteratorWrapper(iterator); - assertNull(wrapper.next()); - } - - public void testTerms() throws Exception { - Random random = random(); - int num = atLeast(10000); - - Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); - TreeMap sorted = new TreeMap(comparator); - TermFreq[] unsorted = new TermFreq[num]; - - for (int i = 0; i < num; i++) { - BytesRef key; - do { - key = new BytesRef(_TestUtil.randomUnicodeString(random)); - } while (sorted.containsKey(key)); - long value = random.nextLong(); - sorted.put(key, value); - unsorted[i] = new TermFreq(key, value); - } - - // test the sorted iterator wrapper - TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator); - Iterator> expected = sorted.entrySet().iterator(); - while (expected.hasNext()) { - Map.Entry entry = expected.next(); - - assertEquals(entry.getKey(), wrapper.next()); - assertEquals(entry.getValue().longValue(), wrapper.weight()); - } - assertNull(wrapper.next()); - - // test the unsorted iterator wrapper - wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted)); - TreeMap actual = new TreeMap(); - BytesRef key; - while ((key = wrapper.next()) != null) { - long value = wrapper.weight(); - actual.put(BytesRef.deepCopyOf(key), value); - } - assertEquals(sorted, actual); - } - - public static long asLong(BytesRef b) { - return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, - b.offset + 4) & 0xFFFFFFFFL); - } - - private static int asIntInternal(BytesRef b, int pos) { - return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) - | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); - } -} diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java new file mode 100644 index 00000000000..e7d82572a3c --- /dev/null +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java @@ -0,0 +1,124 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.util.AbstractMap.SimpleEntry; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestTermFreqPayloadIterator extends LuceneTestCase { + + public void testEmpty() throws Exception { + TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]); + TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); + assertNull(wrapper.next()); + wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator); + assertNull(wrapper.next()); + } + + public void testTerms() throws Exception { + Random random = random(); + int num = atLeast(10000); + + Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); + TreeMap> sorted = new TreeMap<>(comparator); + TreeMap sortedWithoutPayload = new TreeMap<>(comparator); + TermFreqPayload[] unsorted = new TermFreqPayload[num]; + TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num]; + + for (int i = 0; i < num; i++) { + BytesRef key; + BytesRef payload; + do { + key = new BytesRef(_TestUtil.randomUnicodeString(random)); + payload = new BytesRef(_TestUtil.randomUnicodeString(random)); + } while (sorted.containsKey(key)); + long value = random.nextLong(); + sortedWithoutPayload.put(key, value); + sorted.put(key, new SimpleEntry<>(value, payload)); + unsorted[i] = new TermFreqPayload(key, value, payload); + unsortedWithoutPayload[i] = new TermFreqPayload(key, value); + } + + // test the sorted iterator wrapper with payloads + TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator); + Iterator>> expected = sorted.entrySet().iterator(); + while (expected.hasNext()) { + Map.Entry> entry = expected.next(); + + assertEquals(entry.getKey(), wrapper.next()); + assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); + assertEquals(entry.getValue().getValue(), wrapper.payload()); + } + assertNull(wrapper.next()); + + // test the unsorted iterator wrapper with payloads + wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted)); + TreeMap> actual = new TreeMap<>(); + BytesRef key; + while ((key = wrapper.next()) != null) { + long value = wrapper.weight(); + BytesRef payload = wrapper.payload(); + actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload))); + } + assertEquals(sorted, actual); + + // test the sorted iterator wrapper without payloads + TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator); + Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator(); + while (expectedWithoutPayload.hasNext()) { + Map.Entry entry = expectedWithoutPayload.next(); + + assertEquals(entry.getKey(), wrapperWithoutPayload.next()); + assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight()); + assertNull(wrapperWithoutPayload.payload()); + } + assertNull(wrapperWithoutPayload.next()); + + // test the unsorted iterator wrapper without payloads + wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload)); + TreeMap actualWithoutPayload = new TreeMap<>(); + while ((key = wrapperWithoutPayload.next()) != null) { + long value = wrapperWithoutPayload.weight(); + assertNull(wrapperWithoutPayload.payload()); + actualWithoutPayload.put(BytesRef.deepCopyOf(key), value); + } + assertEquals(sortedWithoutPayload, actualWithoutPayload); + } + + public static long asLong(BytesRef b) { + return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, + b.offset + 4) & 0xFFFFFFFFL); + } + + private static int asIntInternal(BytesRef b, int pos) { + return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) + | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); + } +} diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index 995f60daa31..f367f363fc2 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -52,8 +52,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; import org.apache.lucene.search.suggest.TermFreqPayload; import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; @@ -65,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6), - new TermFreq("bar", 5), - new TermFreq("barbara", 1) + Iterable keys = shuffle( + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6), + new TermFreqPayload("bar", 5), + new TermFreqPayload("barbara", 1) ); AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2); @@ -165,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testRandomRealisticKeys() throws IOException { LineFileDocs lineFile = new LineFileDocs(random()); Map mapping = new HashMap<>(); - List keys = new ArrayList<>(); + List keys = new ArrayList<>(); int howMany = atLeast(100); // this might bring up duplicates for (int i = 0; i < howMany; i++) { Document nextDoc = lineFile.nextDoc(); String title = nextDoc.getField("title").stringValue(); int randomWeight = random().nextInt(100); - keys.add(new TermFreq(title, randomWeight)); + keys.add(new TermFreqPayload(title, randomWeight)); if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) { mapping.put(title, Long.valueOf(randomWeight)); } @@ -183,15 +181,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { boolean doPayloads = random().nextBoolean(); if (doPayloads) { List keysAndPayloads = new ArrayList<>(); - for (TermFreq termFreq : keys) { + for (TermFreqPayload termFreq : keys) { keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v)))); } analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads)); } else { - analyzingSuggester.build(new TermFreqArrayIterator(keys)); + analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys)); } - for (TermFreq termFreq : keys) { + for (TermFreqPayload termFreq : keys) { List lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size()); for (LookupResult lookupResult : lookup) { assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value)); @@ -211,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("the ghost of christmas past", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -241,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); } public void testNoSeps() throws Exception { - TermFreq[] keys = new TermFreq[] { - new TermFreq("ab cd", 0), - new TermFreq("abcd", 1), + TermFreqPayload[] keys = new TermFreqPayload[] { + new TermFreqPayload("ab cd", 0), + new TermFreqPayload("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -318,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("wifi network is slow", 50), - new TermFreq("wi fi network is fast", 10), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("wifi network is slow", 50), + new TermFreqPayload("wi fi network is fast", 10), }; //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1); AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { System.out.println("Results: " + results); @@ -384,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("ab xc", 50), - new TermFreq("ba xd", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("ab xc", 50), + new TermFreqPayload("ba xd", 50), }; AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -462,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -502,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -657,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { boolean doPayloads = random().nextBoolean(); - TermFreq[] keys = null; + TermFreqPayload[] keys = null; TermFreqPayload[] payloadKeys = null; if (doPayloads) { payloadKeys = new TermFreqPayload[numQueries]; } else { - keys = new TermFreq[numQueries]; + keys = new TermFreqPayload[numQueries]; } boolean preserveSep = random().nextBoolean(); @@ -735,7 +733,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { payload = new BytesRef(bytes); payloadKeys[i] = new TermFreqPayload(key, weight, payload); } else { - keys[i] = new TermFreq(key, weight); + keys[i] = new TermFreqPayload(key, weight); payload = null; } @@ -758,7 +756,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { if (doPayloads) { suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys))); } else { - suggester.build(new TermFreqArrayIterator(shuffle(keys))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys))); } for (String prefix : allPrefixes) { @@ -876,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1); - suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40), - new TermFreq("a ", 50), new TermFreq(" a", 60)))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40), + new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60)))); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -891,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 2), - new TermFreq("a b c", 3), - new TermFreq("a c a", 1), - new TermFreq("a c b", 1), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 2), + new TermFreqPayload("a b c", 3), + new TermFreqPayload("a c a", 1), + new TermFreqPayload("a c b", 1), })); suggester.lookup("a", false, 4); @@ -907,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 5), - new TermFreq("a b", 3), - new TermFreq("a c", 4), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 5), + new TermFreqPayload("a b", 3), + new TermFreqPayload("a c", 4), })); List results = suggester.lookup("a", false, 3); @@ -972,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(shuffle( - new TermFreq("hambone", 6), - new TermFreq("nellie", 5)))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle( + new TermFreqPayload("hambone", 6), + new TermFreqPayload("nellie", 5)))); List results = suggester.lookup("nellie", false, 2); assertEquals(2, results.size()); @@ -1041,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 6), - new TermFreq("b", 5), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 6), + new TermFreqPayload("b", 5), })); List results = suggester.lookup("a", false, 2); @@ -1114,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a a", 50), - new TermFreq("a b", 50), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a a", 50), + new TermFreqPayload("a b", 50), })); } public void testDupSurfaceFormsMissingResults3() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a a", 7), - new TermFreq("a a", 7), - new TermFreq("a c", 6), - new TermFreq("a c", 3), - new TermFreq("a b", 5), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a a", 7), + new TermFreqPayload("a a", 7), + new TermFreqPayload("a c", 6), + new TermFreqPayload("a c", 3), + new TermFreqPayload("a b", 5), })); assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString()); } @@ -1136,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEndingSpace() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("i love lucy", 7), - new TermFreq("isla de muerta", 8), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("i love lucy", 7), + new TermFreqPayload("isla de muerta", 8), })); assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString()); assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString()); @@ -1169,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { }; AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] {new TermFreq("a", 1)})); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)})); assertEquals("[a/1]", suggester.lookup("a", false, 1).toString()); } public void testIllegalLookupArgument() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("а где Люси?", 7), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("а где Люси?", 7), })); try { suggester.lookup("а\u001E", false, 3); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java index 06556f5c823..e65f2bc192a 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java @@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; +import org.apache.lucene.search.suggest.TermFreqPayload; +import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; @@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util; public class FuzzySuggesterTest extends LuceneTestCase { public void testRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreq("foo bar boo far", 12)); + keys.add(new TermFreqPayload("foo bar boo far", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); @@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNonLatinRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreq("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreq("фуу бар буу фар", 12)); + keys.add(new TermFreqPayload("фуу бар буу фар", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("фуу бар буу", 0); @@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6) + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6) }; FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2); assertEquals(2, results.size()); @@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("the ghost of christmas past", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); FuzzySuggester suggester = new FuzzySuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNoSeps() throws Exception { - TermFreq[] keys = new TermFreq[] { - new TermFreq("ab cd", 0), - new TermFreq("abcd", 1), + TermFreqPayload[] keys = new TermFreqPayload[] { + new TermFreqPayload("ab cd", 0), + new TermFreqPayload("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("wifi network is slow", 50), - new TermFreq("wi fi network is fast", 10), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("wifi network is slow", 50), + new TermFreqPayload("wi fi network is fast", 10), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { @@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); @@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("ab xc", 50), - new TermFreq("ba xd", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("ab xc", 50), + new TermFreqPayload("ba xd", 50), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends LuceneTestCase { } // Holds surface form separately: - private static class TermFreq2 implements Comparable { + private static class TermFreqPayload2 implements Comparable { public final String surfaceForm; public final String analyzedForm; public final long weight; - public TermFreq2(String surfaceForm, String analyzedForm, long weight) { + public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) { this.surfaceForm = surfaceForm; this.analyzedForm = analyzedForm; this.weight = weight; } @Override - public int compareTo(TermFreq2 other) { + public int compareTo(TermFreqPayload2 other) { int cmp = analyzedForm.compareTo(other.analyzedForm); if (cmp != 0) { return cmp; @@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { int numQueries = atLeast(100); - final List slowCompletor = new ArrayList(); + final List slowCompletor = new ArrayList(); final TreeSet allPrefixes = new TreeSet(); final Set seen = new HashSet(); - TermFreq[] keys = new TermFreq[numQueries]; + TermFreqPayload[] keys = new TermFreqPayload[numQueries]; boolean preserveSep = random().nextBoolean(); boolean unicodeAware = random().nextBoolean(); @@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends LuceneTestCase { } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); - keys[i] = new TermFreq(key, weight); + keys[i] = new TermFreqPayload(key, weight); - slowCompletor.add(new TermFreq2(key, analyzedKey, weight)); + slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight)); } if (VERBOSE) { // Don't just sort original list, to avoid VERBOSE // altering the test: - List sorted = new ArrayList(slowCompletor); + List sorted = new ArrayList(slowCompletor); Collections.sort(sorted); - for(TermFreq2 ent : sorted) { + for(TermFreqPayload2 ent : sorted) { System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight); } } @@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); FuzzySuggester suggester = new FuzzySuggester(a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); for (String prefix : allPrefixes) { @@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { assertTrue(automaton.isDeterministic()); // TODO: could be faster... but its slowCompletor for a reason BytesRef spare = new BytesRef(); - for (TermFreq2 e : slowCompletor) { + for (TermFreqPayload2 e : slowCompletor) { spare.copyChars(e.analyzedForm); Set finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton); for (IntsRef intsRef : finiteStrings) { @@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false); - List keys = Arrays.asList(new TermFreq[] { - new TermFreq("a", 40), - new TermFreq("a ", 50), - new TermFreq(" a", 60), + List keys = Arrays.asList(new TermFreqPayload[] { + new TermFreqPayload("a", 40), + new TermFreqPayload("a ", 50), + new TermFreqPayload(" a", 60), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false); - List keys = Arrays.asList(new TermFreq[] { - new TermFreq("foo bar", 40), - new TermFreq("foo bar baz", 50), - new TermFreq("barbaz", 60), - new TermFreq("barbazfoo", 10), + List keys = Arrays.asList(new TermFreqPayload[] { + new TermFreqPayload("foo bar", 40), + new TermFreqPayload("foo bar baz", 50), + new TermFreqPayload("barbaz", 60), + new TermFreqPayload("barbazfoo", 10), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString()); assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString()); @@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testRandom2() throws Throwable { final int NUM = atLeast(200); - final List answers = new ArrayList(); + final List answers = new ArrayList(); final Set seen = new HashSet(); for(int i=0;i() { + Collections.sort(answers, new Comparator() { @Override - public int compare(TermFreq a, TermFreq b) { + public int compare(TermFreqPayload a, TermFreqPayload b) { return a.term.compareTo(b.term); } }); if (VERBOSE) { System.out.println("\nTEST: targets"); - for(TermFreq tf : answers) { + for(TermFreqPayload tf : answers) { System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v); } } @@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { } Collections.shuffle(answers, random()); - suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()]))); + suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()]))); final int ITERS = atLeast(100); for(int iter=0;iter slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { + private List slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { final List results = new ArrayList(); final int fragLen = frag.length(); - for(TermFreq tf : answers) { + for(TermFreqPayload tf : answers) { //System.out.println(" check s=" + tf.term.utf8ToString()); boolean prefixMatches = true; for(int i=0;i keys = shuffle( - new TermFreq("foo bar baz blah", 50), - new TermFreq("boo foo bar foo bee", 20) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz blah", 50), + new TermFreqPayload("boo foo bar foo bee", 20) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); for(int i=0;i<2;i++) { @@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringBuild() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreq("foo\u001ebar baz", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo\u001ebar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); try { - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); fail("did not hit expected exception"); } catch (IllegalArgumentException iae) { // expected @@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringQuery() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreq("foo bar baz", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); try { sug.lookup("foo\u001eb", 10); @@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Skip header: lfd.nextDoc(); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqIterator() { + sug.build(new TermFreqPayloadIterator() { private int count; @@ -161,6 +161,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { } return new BytesRef(doc.get("body")); } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } }); if (VERBOSE) { System.out.println(sug.sizeInBytes() + " bytes"); @@ -175,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure you can suggest based only on unigram model: public void testUnigrams() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar baz blah boo foo bar foo bee", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); // Sorts first by count, descending, second by term, ascending assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11", toString(sug.lookup("b", 10))); @@ -189,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure the last token is not duplicated public void testNoDupsAcrossGrams() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("foo bar/1.00", toString(sug.lookup("foo b", 10))); } // Lookup of just empty string produces unicode only matches: public void testEmptyString() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); try { sug.lookup("", 10); fail("did not hit exception"); @@ -228,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreq("wizard of oz", 50) + Iterable keys = shuffle( + new TermFreqPayload("wizard of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10))); @@ -256,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreq("wizard of of oz", 50) + Iterable keys = shuffle( + new TermFreqPayload("wizard of of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("", toString(sug.lookup("wizard of of", 10))); } @@ -320,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Build suggester model: FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20); - sug.build(new TermFreqIterator() { + sug.build(new TermFreqPayloadIterator() { int upto; @Override @@ -342,6 +352,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { public long weight() { return random().nextLong(); } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } }); // Build inefficient but hopefully correct model: diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index ff835bd4efa..42594e76f38 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -28,8 +28,8 @@ import org.apache.lucene.util.*; * Unit tests for {@link FSTCompletion}. */ public class FSTCompletionTest extends LuceneTestCase { - public static TermFreq tf(String t, int v) { - return new TermFreq(t, v); + public static TermFreqPayload tf(String t, int v) { + return new TermFreqPayload(t, v); } private FSTCompletion completion; @@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase { super.setUp(); FSTCompletionBuilder builder = new FSTCompletionBuilder(); - for (TermFreq tf : evalKeys()) { + for (TermFreqPayload tf : evalKeys()) { builder.add(tf.term, (int) tf.v); } completion = builder.build(); completionAlphabetical = new FSTCompletion(completion.getFST(), false, true); } - private TermFreq[] evalKeys() { - final TermFreq[] keys = new TermFreq[] { + private TermFreqPayload[] evalKeys() { + final TermFreqPayload[] keys = new TermFreqPayload[] { tf("one", 0), tf("oneness", 1), tf("onerous", 1), @@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(10, true); Random r = random(); - List keys = new ArrayList(); + List keys = new ArrayList(); for (int i = 0; i < 5000; i++) { - keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1)); + keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1)); } - lookup.build(new TermFreqArrayIterator(keys)); + lookup.build(new TermFreqPayloadArrayIterator(keys)); // All the weights were constant, so all returned buckets must be constant, whatever they // are. Long previous = null; - for (TermFreq tf : keys) { + for (TermFreqPayload tf : keys) { Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue(); if (previous != null) { assertEquals(previous, current); @@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testMultilingualInput() throws Exception { - List input = LookupBenchmarkTest.readTop50KWiki(); + List input = LookupBenchmarkTest.readTop50KWiki(); FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqArrayIterator(input)); - for (TermFreq tf : input) { + lookup.build(new TermFreqPayloadArrayIterator(input)); + for (TermFreqPayload tf : input) { assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); } @@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testRandom() throws Exception { - List freqs = new ArrayList(); + List freqs = new ArrayList(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); - freqs.add(new TermFreq("" + rnd.nextLong(), weight)); + freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); + lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()]))); - for (TermFreq tf : freqs) { + for (TermFreqPayload tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index 3d96ba7c35a..2fe2c7a4d85 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst; import java.util.*; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; +import org.apache.lucene.search.suggest.TermFreqPayload; +import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil; public class WFSTCompletionTest extends LuceneTestCase { public void testBasic() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6) + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6) }; Random random = new Random(random().nextLong()); WFSTCompletionLookup suggester = new WFSTCompletionLookup(); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2); @@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(true); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 20), - new TermFreq("x", 2), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 20), + new TermFreqPayload("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 20), - new TermFreq("x", 2), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 20), + new TermFreqPayload("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase { final TreeMap slowCompletor = new TreeMap(); final TreeSet allPrefixes = new TreeSet(); - TermFreq[] keys = new TermFreq[numWords]; + TermFreqPayload[] keys = new TermFreqPayload[numWords]; for (int i = 0; i < numWords; i++) { String s; @@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase { // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); slowCompletor.put(s, (long)weight); - keys[i] = new TermFreq(s, weight); + keys[i] = new TermFreqPayload(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); Random random = new Random(random().nextLong()); for (String prefix : allPrefixes) { @@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq(key1, 50), - new TermFreq(key2, 50), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload(key1, 50), + new TermFreqPayload(key2, 50), })); } public void testEmpty() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); }