LUCENE-5260: cutover all suggesters to TermFreqPayloadIterator

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1531664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-10-13 10:26:22 +00:00
parent 544d997664
commit c1be5650a9
30 changed files with 638 additions and 577 deletions

View File

@ -150,6 +150,10 @@ API Changes:
numBits parameter to allow growing/shrinking the copied bitset. You can
use FixedBitSet.clone() if you only need to clone the bitset. (Shai Erera)
* LUCENE-5260: Use TermFreqPayloadIterator for all suggesters; those
suggesters that can't support payloads will throw an exception if
hasPayloads() is true. (Areek Zillur via Mike McCandless)
Optimizations
* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child

View File

@ -59,7 +59,7 @@ public class HighFrequencyDictionary implements Dictionary {
return new HighFrequencyIterator();
}
final class HighFrequencyIterator implements TermFreqIterator {
final class HighFrequencyIterator implements TermFreqPayloadIterator {
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
@ -98,5 +98,15 @@ public class HighFrequencyDictionary implements Dictionary {
}
return null;
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
}
}

View File

@ -1,58 +0,0 @@
package org.apache.lucene.search.spell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/**
* Interface for enumerating term,weight pairs.
*/
public interface TermFreqIterator extends BytesRefIterator {
/** A term's weight, higher numbers mean better suggestions. */
public long weight();
/**
* Wraps a BytesRefIterator as a TermFreqIterator, with all weights
* set to <code>1</code>
*/
public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped;
/**
* Creates a new wrapper, wrapping the specified iterator and
* specifying a weight value of <code>1</code> for all terms.
*/
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped;
}
@Override
public long weight() {
return 1;
}
@Override
public BytesRef next() throws IOException {
return wrapped.next();
}
}
}

View File

@ -17,20 +17,67 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/**
* Interface for enumerating term,weight,payload triples;
* currently only {@link AnalyzingSuggester} and {@link
* FuzzySuggester} support payloads.
* currently only {@link AnalyzingSuggester}, {@link
* FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads.
*/
public interface TermFreqPayloadIterator extends TermFreqIterator {
public interface TermFreqPayloadIterator extends BytesRefIterator {
/** A term's weight, higher numbers mean better suggestions. */
public long weight();
/** An arbitrary byte[] to record per suggestion. See
* {@link LookupResult#payload} to retrieve the payload
* for each suggestion. */
public BytesRef payload();
/** Returns true if the iterator has payloads */
public boolean hasPayloads();
/**
* Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights
* set to <code>1</code> and carries no payload
*/
public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
private final BytesRefIterator wrapped;
/**
* Creates a new wrapper, wrapping the specified iterator and
* specifying a weight value of <code>1</code> for all terms
* and nullifies associated payloads.
*/
public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped;
}
@Override
public long weight() {
return 1;
}
@Override
public BytesRef next() throws IOException {
return wrapped.next();
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
}
}

View File

@ -18,7 +18,8 @@ package org.apache.lucene.search.suggest;
*/
import java.io.IOException;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
@ -27,22 +28,30 @@ import org.apache.lucene.util.Counter;
* This wrapper buffers incoming elements.
* @lucene.experimental
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
// TODO keep this for now
/** buffered term entries */
protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
/** buffered payload entries */
protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
/** current buffer position */
protected int curPos = -1;
/** buffered weights, parallel with {@link #entries} */
protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef();
private final BytesRef payloadSpare = new BytesRef();
private final boolean hasPayloads;
/** Creates a new iterator, buffering entries from the specified iterator */
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
BytesRef spare;
int freqIndex = 0;
hasPayloads = source.hasPayloads();
while((spare = source.next()) != null) {
entries.append(spare);
if (hasPayloads) {
payloads.append(source.payload());
}
if (freqIndex >= freqs.length) {
freqs = ArrayUtil.grow(freqs, freqs.length+1);
}
@ -64,4 +73,17 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
}
return null;
}
@Override
public BytesRef payload() {
if (hasPayloads && curPos < payloads.size()) {
return payloads.get(payloadSpare, curPos);
}
return null;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
}

View File

@ -47,12 +47,6 @@ import org.apache.lucene.util.BytesRefIterator;
* The term, weight and (optionally) payload fields supplied
* are required for ALL documents and has to be stored
* </li>
* <li>
* This Dictionary implementation is not compatible with the following Suggesters:
* {@link JaspellLookup}, {@link TSTLookup}, {@link FSTCompletionLookup},
* {@link WFSTCompletionLookup} and {@link AnalyzingInfixSuggester}.
* see https://issues.apache.org/jira/browse/LUCENE-5260
* </li>
* </ul>
*/
public class DocumentDictionary implements Dictionary {
@ -95,7 +89,7 @@ public class DocumentDictionary implements Dictionary {
final class TermWeightPayloadIterator implements TermFreqPayloadIterator {
private final int docCount;
private final Set<String> relevantFields;
private final boolean withPayload;
private final boolean hasPayloads;
private final Bits liveDocs;
private int currentDocId = -1;
private long currentWeight;
@ -106,13 +100,13 @@ public class DocumentDictionary implements Dictionary {
* index. setting <code>withPayload</code> to false, implies an iterator
* over only term and weight.
*/
public TermWeightPayloadIterator(boolean withPayload) throws IOException {
public TermWeightPayloadIterator(boolean hasPayloads) throws IOException {
docCount = reader.maxDoc() - 1;
this.withPayload = withPayload;
this.hasPayloads = hasPayloads;
currentPayload = null;
liveDocs = MultiFields.getLiveDocs(reader);
List<String> relevantFieldList;
if(withPayload) {
if(hasPayloads) {
relevantFieldList = Arrays.asList(field, weightField, payloadField);
} else {
relevantFieldList = Arrays.asList(field, weightField);
@ -135,7 +129,7 @@ public class DocumentDictionary implements Dictionary {
StoredDocument doc = reader.document(currentDocId, relevantFields);
if (withPayload) {
if (hasPayloads) {
StorableField payload = doc.getField(payloadField);
if (payload == null) {
throw new IllegalArgumentException(payloadField + " does not exist");
@ -169,6 +163,11 @@ public class DocumentDictionary implements Dictionary {
public BytesRef payload() {
return currentPayload;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
}
}

View File

@ -21,7 +21,7 @@ package org.apache.lucene.search.suggest;
import java.io.*;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -57,11 +57,11 @@ public class FileDictionary implements Dictionary {
}
@Override
public TermFreqIterator getWordsIterator() {
public TermFreqPayloadIterator getWordsIterator() {
return new FileIterator();
}
final class FileIterator implements TermFreqIterator {
final class FileIterator implements TermFreqPayloadIterator {
private long curFreq;
private final BytesRef spare = new BytesRef();
@ -98,5 +98,15 @@ public class FileDictionary implements Dictionary {
return null;
}
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
}
}

View File

@ -24,7 +24,7 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
@ -154,25 +154,25 @@ public abstract class Lookup {
/** Build lookup from a dictionary. Some implementations may require sorted
* or unsorted keys from the dictionary's iterator - use
* {@link SortedTermFreqIteratorWrapper} or
* {@link UnsortedTermFreqIteratorWrapper} in such case.
* {@link SortedTermFreqPayloadIteratorWrapper} or
* {@link UnsortedTermFreqPayloadIteratorWrapper} in such case.
*/
public void build(Dictionary dict) throws IOException {
BytesRefIterator it = dict.getWordsIterator();
TermFreqIterator tfit;
if (it instanceof TermFreqIterator) {
tfit = (TermFreqIterator)it;
TermFreqPayloadIterator tfit;
if (it instanceof TermFreqPayloadIterator) {
tfit = (TermFreqPayloadIterator)it;
} else {
tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it);
}
build(tfit);
}
/**
* Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
* Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}.
* The implementation might re-sort the data internally.
*/
public abstract void build(TermFreqIterator tfit) throws IOException;
public abstract void build(TermFreqPayloadIterator tfit) throws IOException;
/**
* Look up a key and return possible completion for this key.

View File

@ -21,7 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
import org.apache.lucene.store.ByteArrayDataInput;
@ -34,23 +34,25 @@ import org.apache.lucene.util.IOUtils;
* This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
* @lucene.experimental
*/
public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
private final TermFreqIterator source;
private final TermFreqPayloadIterator source;
private File tempInput;
private File tempSorted;
private final ByteSequencesReader reader;
private final Comparator<BytesRef> comparator;
private final boolean hasPayloads;
private boolean done = false;
private long weight;
private final BytesRef scratch = new BytesRef();
private BytesRef payload = new BytesRef();
/**
* Creates a new sorted wrapper, using {@link
* BytesRef#getUTF8SortedAsUnicodeComparator} for
* sorting. */
public SortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
}
@ -58,7 +60,8 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
* Creates a new sorted wrapper, sorting by BytesRef
* (ascending) then cost (ascending).
*/
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator<BytesRef> comparator) throws IOException {
this.hasPayloads = source.hasPayloads();
this.source = source;
this.comparator = comparator;
this.reader = sort();
@ -74,6 +77,9 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
ByteArrayDataInput input = new ByteArrayDataInput();
if (reader.read(scratch)) {
weight = decode(scratch, input);
if (hasPayloads) {
payload = decodePayload(scratch, input);
}
success = true;
return scratch;
}
@ -93,6 +99,19 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
return weight;
}
@Override
public BytesRef payload() {
if (hasPayloads) {
return payload;
}
return null;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
/** Sortes by BytesRef (ascending) then cost (ascending). */
private final Comparator<BytesRef> tieBreakByCostComparator = new Comparator<BytesRef>() {
@ -111,6 +130,10 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
rightScratch.length = right.length;
long leftCost = decode(leftScratch, input);
long rightCost = decode(rightScratch, input);
if (hasPayloads) {
decodePayload(leftScratch, input);
decodePayload(rightScratch, input);
}
int cmp = comparator.compare(leftScratch, rightScratch);
if (cmp != 0) {
return cmp;
@ -133,7 +156,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
while ((spare = source.next()) != null) {
encode(writer, output, buffer, spare, source.weight());
encode(writer, output, buffer, spare, source.payload(), source.weight());
}
writer.close();
new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted);
@ -164,13 +187,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
}
}
/** encodes an entry (bytes+weight) to the provided writer */
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
if (spare.length + 8 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 8);
/** encodes an entry (bytes+(payload)+weight) to the provided writer */
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
if (requiredLength >= buffer.length) {
buffer = ArrayUtil.grow(buffer, requiredLength);
}
output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length);
if (hasPayloads) {
output.writeBytes(payload.bytes, payload.offset, payload.length);
output.writeShort((short) payload.length);
}
output.writeLong(weight);
writer.write(buffer, 0, output.getPosition());
}
@ -182,4 +210,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
scratch.length -= 8; // long
return tmpInput.readLong();
}
/** decodes the payload at the current position */
protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
tmpInput.reset(scratch.bytes);
tmpInput.skipBytes(scratch.length - 2); // skip to payload size
short payloadLength = tmpInput.readShort(); // read payload size
tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
BytesRef payloadScratch = new BytesRef(payloadLength);
tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
payloadScratch.length = payloadLength;
scratch.length -= 2; // payload length info (short)
scratch.length -= payloadLength; // payload
return payloadScratch;
}
}

View File

@ -20,7 +20,7 @@ package org.apache.lucene.search.suggest;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
/**
@ -28,16 +28,17 @@ import org.apache.lucene.util.BytesRef;
* random order.
* @lucene.experimental
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper {
// TODO keep this for now
private final int[] ords;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
private final BytesRef payloadSpare = new BytesRef();
/**
* Creates a new iterator, wrapping the specified iterator and
* returning elements in a random order.
*/
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
super(source);
ords = new int[entries.size()];
Random random = new Random();
@ -54,13 +55,24 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
@Override
public long weight() {
assert currentOrd == ords[curPos];
return freqs[currentOrd];
}
@Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
return entries.get(spare, (currentOrd = ords[curPos]));
currentOrd = ords[curPos];
return entries.get(spare, currentOrd);
}
return null;
}
@Override
public BytesRef payload() {
if (hasPayloads() && curPos < payloads.size()) {
assert currentOrd == ords[curPos];
return payloads.get(payloadSpare, currentOrd);
}
return null;
}

View File

@ -65,7 +65,6 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.Lookup;
@ -176,19 +175,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
@Override
public void build(TermFreqIterator iter) throws IOException {
public void build(TermFreqPayloadIterator iter) throws IOException {
if (searcher != null) {
searcher.getIndexReader().close();
searcher = null;
}
TermFreqPayloadIterator payloads;
if (iter instanceof TermFreqPayloadIterator) {
payloads = (TermFreqPayloadIterator) iter;
} else {
payloads = null;
}
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
IndexWriter w = null;
@ -236,7 +230,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
doc.add(weightField);
Field payloadField;
if (payloads != null) {
if (iter.hasPayloads()) {
payloadField = new BinaryDocValuesField("payloads", new BytesRef());
doc.add(payloadField);
} else {
@ -250,8 +244,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
textGramField.setStringValue(textString);
textDVField.setBytesValue(text);
weightField.setLongValue(iter.weight());
if (payloads != null) {
payloadField.setBytesValue(payloads.payload());
if (iter.hasPayloads()) {
payloadField.setBytesValue(iter.payload());
}
w.addDocument(doc);
}

View File

@ -31,7 +31,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
@ -381,19 +380,13 @@ public class AnalyzingSuggester extends Lookup {
}
@Override
public void build(TermFreqIterator iterator) throws IOException {
public void build(TermFreqPayloadIterator iterator) throws IOException {
String prefix = getClass().getSimpleName();
File directory = Sort.defaultTempDir();
File tempInput = File.createTempFile(prefix, ".input", directory);
File tempSorted = File.createTempFile(prefix, ".sorted", directory);
TermFreqPayloadIterator payloads;
if (iterator instanceof TermFreqPayloadIterator) {
payloads = (TermFreqPayloadIterator) iterator;
} else {
payloads = null;
}
hasPayloads = payloads != null;
hasPayloads = iterator.hasPayloads();
Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
Sort.ByteSequencesReader reader = null;
@ -432,7 +425,7 @@ public class AnalyzingSuggester extends Lookup {
if (surfaceForm.length > (Short.MAX_VALUE-2)) {
throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
}
payload = payloads.payload();
payload = iterator.payload();
// payload + surfaceLength (short)
requiredLength += payload.length + 2;
} else {
@ -470,7 +463,7 @@ public class AnalyzingSuggester extends Lookup {
writer.close();
// Sort all input/output pairs (required by FST.Builder):
new Sort(new AnalyzingComparator(payloads != null)).sort(tempInput, tempSorted);
new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
// Free disk space:
tempInput.delete();

View File

@ -54,7 +54,6 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
@ -274,15 +273,15 @@ public class FreeTextSuggester extends Lookup {
}
@Override
public void build(TermFreqIterator iterator) throws IOException {
public void build(TermFreqPayloadIterator iterator) throws IOException {
build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
}
/** Build the suggest index, using up to the specified
* amount of temporary RAM while building. Note that
* the weights for the suggestions are ignored. */
public void build(TermFreqIterator iterator, double ramBufferSizeMB) throws IOException {
if (iterator instanceof TermFreqPayloadIterator) {
public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("payloads are not supported");
}

View File

@ -24,7 +24,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.SortInfo;
@ -43,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs;
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
* to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
* to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@ -96,7 +95,7 @@ public class FSTCompletionLookup extends Lookup {
/**
* This constructor prepares for creating a suggested FST using the
* {@link #build(TermFreqIterator)} method. The number of weight
* {@link #build(TermFreqPayloadIterator)} method. The number of weight
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
* exact matches are promoted to the top of the suggestions list.
*/
@ -106,7 +105,7 @@ public class FSTCompletionLookup extends Lookup {
/**
* This constructor prepares for creating a suggested FST using the
* {@link #build(TermFreqIterator)} method.
* {@link #build(TermFreqPayloadIterator)} method.
*
* @param buckets
* The number of weight discretization buckets (see
@ -141,8 +140,8 @@ public class FSTCompletionLookup extends Lookup {
}
@Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof TermFreqPayloadIterator) {
public void build(TermFreqPayloadIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
File tempInput = File.createTempFile(

View File

@ -25,11 +25,10 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
@ -93,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup {
}
@Override
public void build(TermFreqIterator iterator) throws IOException {
if (iterator instanceof TermFreqPayloadIterator) {
public void build(TermFreqPayloadIterator iterator) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
BytesRef scratch = new BytesRef();
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -255,14 +254,15 @@ public class WFSTCompletionLookup extends Lookup {
return Integer.MAX_VALUE - (int)value;
}
private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper {
WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
super(source);
assert source.hasPayloads() == false;
}
@Override
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
}

View File

@ -25,7 +25,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
@ -47,13 +46,13 @@ public class JaspellLookup extends Lookup {
/**
* Creates a new empty trie
* @see #build(TermFreqIterator)
* @see #build(TermFreqPayloadIterator)
* */
public JaspellLookup() {}
@Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof TermFreqPayloadIterator) {
public void build(TermFreqPayloadIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
trie = new JaspellTernarySearchTrie();

View File

@ -25,10 +25,9 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
@ -46,19 +45,19 @@ public class TSTLookup extends Lookup {
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
* @see #build(TermFreqIterator)
* @see #build(TermFreqPayloadIterator)
*/
public TSTLookup() {}
@Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof TermFreqPayloadIterator) {
public void build(TermFreqPayloadIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
root = new TernaryTreeNode();
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Number> vals = new ArrayList<Number>();

View File

@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Input term/weight pairs.
*/
private static TermFreq [] dictionaryInput;
private static TermFreqPayload [] dictionaryInput;
/**
* Benchmark term/weight pairs (randomized order).
*/
private static List<TermFreq> benchmarkInput;
private static List<TermFreqPayload> benchmarkInput;
/**
* Loads terms and frequencies from Wikipedia (cached).
@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase {
@BeforeClass
public static void setup() throws Exception {
assert false : "disable assertions before running benchmarks!";
List<TermFreq> input = readTop50KWiki();
List<TermFreqPayload> input = readTop50KWiki();
Collections.shuffle(input, random);
LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]);
Collections.shuffle(input, random);
LookupBenchmarkTest.benchmarkInput = input;
}
@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Collect the multilingual input for benchmarks/ tests.
*/
public static List<TermFreq> readTop50KWiki() throws Exception {
List<TermFreq> input = new ArrayList<TermFreq>();
public static List<TermFreqPayload> readTop50KWiki() throws Exception {
List<TermFreqPayload> input = new ArrayList<TermFreqPayload>();
URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
assert resource != null : "Resource missing: Top50KWiki.utf8";
@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
assertTrue("No | separator?: " + line, tab >= 0);
int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
input.add(new TermFreq(key, weight));
input.add(new TermFreqPayload(key, weight));
}
br.close();
return input;
@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Create {@link Lookup} instance and populate it.
*/
private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
private Lookup buildLookup(Class<? extends Lookup> cls, TermFreqPayload[] input) throws Exception {
Lookup lookup = null;
try {
lookup = cls.newInstance();
@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
lookup = ctor.newInstance(a);
}
}
lookup.build(new TermFreqArrayIterator(input));
lookup.build(new TermFreqPayloadArrayIterator(input));
return lookup;
}
@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
final Lookup lookup = buildLookup(cls, dictionaryInput);
final List<String> input = new ArrayList<String>(benchmarkInput.size());
for (TermFreq tf : benchmarkInput) {
for (TermFreqPayload tf : benchmarkInput) {
String s = tf.term.utf8ToString();
String sub = s.substring(0, Math.min(s.length(),
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));

View File

@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase {
// Add all input keys.
Lookup lookup = lookupClass.newInstance();
TermFreq[] keys = new TermFreq[this.keys.length];
TermFreqPayload[] keys = new TermFreqPayload[this.keys.length];
for (int i = 0; i < keys.length; i++)
keys[i] = new TermFreq(this.keys[i], i);
lookup.build(new TermFreqArrayIterator(keys));
keys[i] = new TermFreqPayload(this.keys[i], i);
lookup.build(new TermFreqPayloadArrayIterator(keys));
// Store the suggester.
File storeDir = TEMP_DIR;
@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase {
// Assert validity.
Random random = random();
long previous = Long.MIN_VALUE;
for (TermFreq k : keys) {
for (TermFreqPayload k : keys) {
List<LookupResult> list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
assertEquals(1, list.size());
LookupResult lookupResult = list.get(0);

View File

@ -1,34 +0,0 @@
package org.apache.lucene.search.suggest;
import org.apache.lucene.util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public final class TermFreq {
public final BytesRef term;
public final long v;
public TermFreq(String term, long v) {
this(new BytesRef(term), v);
}
public TermFreq(BytesRef term, long v) {
this.term = term;
this.v = v;
}
}

View File

@ -1,60 +0,0 @@
package org.apache.lucene.search.suggest;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
/**
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
*/
public final class TermFreqArrayIterator implements TermFreqIterator {
private final Iterator<TermFreq> i;
private TermFreq current;
private final BytesRef spare = new BytesRef();
public TermFreqArrayIterator(Iterator<TermFreq> i) {
this.i = i;
}
public TermFreqArrayIterator(TermFreq [] i) {
this(Arrays.asList(i));
}
public TermFreqArrayIterator(Iterable<TermFreq> i) {
this(i.iterator());
}
@Override
public long weight() {
return current.v;
}
@Override
public BytesRef next() {
if (i.hasNext()) {
current = i.next();
spare.copyBytes(current.term);
return spare;
}
return null;
}
}

View File

@ -23,14 +23,32 @@ public final class TermFreqPayload {
public final BytesRef term;
public final long v;
public final BytesRef payload;
public final boolean hasPayloads;
public TermFreqPayload(String term, long v, BytesRef payload) {
this(new BytesRef(term), v, payload);
public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
this(term, v, payload, true);
}
public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
public TermFreqPayload(String term, long v, BytesRef payload) {
this(new BytesRef(term), v, payload, true);
}
public TermFreqPayload(BytesRef term, long v) {
this(term, v, null, false);
}
public TermFreqPayload(String term, long v) {
this(new BytesRef(term), v, null, false);
}
public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
this.term = term;
this.v = v;
this.payload = payload;
this.hasPayloads = hasPayloads;
}
public boolean hasPayloads() {
return hasPayloads;
}
}

View File

@ -20,26 +20,33 @@ package org.apache.lucene.search.suggest;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
/**
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
* A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s.
*/
public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator {
private final Iterator<TermFreqPayload> i;
private final boolean hasPayloads;
private boolean first;
private TermFreqPayload current;
private final BytesRef spare = new BytesRef();
public TermFreqPayloadArrayIterator(Iterator<TermFreqPayload> i) {
this.i = i;
if (i.hasNext()) {
current = i.next();
first = true;
this.hasPayloads = current.hasPayloads;
} else {
this.hasPayloads = false;
}
}
public TermFreqPayloadArrayIterator(TermFreqPayload[] i) {
this(Arrays.asList(i));
}
public TermFreqPayloadArrayIterator(Iterable<TermFreqPayload> i) {
this(i.iterator());
}
@ -51,8 +58,12 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat
@Override
public BytesRef next() {
if (i.hasNext()) {
current = i.next();
if (i.hasNext() || (first && current!=null)) {
if (first) {
first = false;
} else {
current = i.next();
}
spare.copyBytes(current.term);
return spare;
}
@ -63,4 +74,9 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat
public BytesRef payload() {
return current.payload;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
}

View File

@ -1,92 +0,0 @@
package org.apache.lucene.search.suggest;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestTermFreqIterator extends LuceneTestCase {
public void testEmpty() throws Exception {
TermFreqArrayIterator iterator = new TermFreqArrayIterator(new TermFreq[0]);
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
assertNull(wrapper.next());
wrapper = new UnsortedTermFreqIteratorWrapper(iterator);
assertNull(wrapper.next());
}
public void testTerms() throws Exception {
Random random = random();
int num = atLeast(10000);
Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>(comparator);
TermFreq[] unsorted = new TermFreq[num];
for (int i = 0; i < num; i++) {
BytesRef key;
do {
key = new BytesRef(_TestUtil.randomUnicodeString(random));
} while (sorted.containsKey(key));
long value = random.nextLong();
sorted.put(key, value);
unsorted[i] = new TermFreq(key, value);
}
// test the sorted iterator wrapper
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator);
Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry<BytesRef,Long> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().longValue(), wrapper.weight());
}
assertNull(wrapper.next());
// test the unsorted iterator wrapper
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
BytesRef key;
while ((key = wrapper.next()) != null) {
long value = wrapper.weight();
actual.put(BytesRef.deepCopyOf(key), value);
}
assertEquals(sorted, actual);
}
public static long asLong(BytesRef b) {
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
b.offset + 4) & 0xFFFFFFFFL);
}
private static int asIntInternal(BytesRef b, int pos) {
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
}
}

View File

@ -0,0 +1,124 @@
package org.apache.lucene.search.suggest;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.util.AbstractMap.SimpleEntry;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestTermFreqPayloadIterator extends LuceneTestCase {
public void testEmpty() throws Exception {
TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]);
TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
assertNull(wrapper.next());
wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator);
assertNull(wrapper.next());
}
public void testTerms() throws Exception {
Random random = random();
int num = atLeast(10000);
Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> sorted = new TreeMap<>(comparator);
TreeMap<BytesRef, Long> sortedWithoutPayload = new TreeMap<>(comparator);
TermFreqPayload[] unsorted = new TermFreqPayload[num];
TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num];
for (int i = 0; i < num; i++) {
BytesRef key;
BytesRef payload;
do {
key = new BytesRef(_TestUtil.randomUnicodeString(random));
payload = new BytesRef(_TestUtil.randomUnicodeString(random));
} while (sorted.containsKey(key));
long value = random.nextLong();
sortedWithoutPayload.put(key, value);
sorted.put(key, new SimpleEntry<>(value, payload));
unsorted[i] = new TermFreqPayload(key, value, payload);
unsortedWithoutPayload[i] = new TermFreqPayload(key, value);
}
// test the sorted iterator wrapper with payloads
TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
assertEquals(entry.getValue().getValue(), wrapper.payload());
}
assertNull(wrapper.next());
// test the unsorted iterator wrapper with payloads
wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted));
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();
BytesRef key;
while ((key = wrapper.next()) != null) {
long value = wrapper.weight();
BytesRef payload = wrapper.payload();
actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload)));
}
assertEquals(sorted, actual);
// test the sorted iterator wrapper without payloads
TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator);
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
while (expectedWithoutPayload.hasNext()) {
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
assertNull(wrapperWithoutPayload.payload());
}
assertNull(wrapperWithoutPayload.next());
// test the unsorted iterator wrapper without payloads
wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload));
TreeMap<BytesRef, Long> actualWithoutPayload = new TreeMap<>();
while ((key = wrapperWithoutPayload.next()) != null) {
long value = wrapperWithoutPayload.weight();
assertNull(wrapperWithoutPayload.payload());
actualWithoutPayload.put(BytesRef.deepCopyOf(key), value);
}
assertEquals(sortedWithoutPayload, actualWithoutPayload);
}
public static long asLong(BytesRef b) {
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
b.offset + 4) & 0xFFFFFFFFL);
}
private static int asIntInternal(BytesRef b, int pos) {
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
}
}

View File

@ -52,8 +52,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
@ -65,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo", 50),
new TermFreq("bar", 10),
new TermFreq("barbar", 10),
new TermFreq("barbar", 12),
new TermFreq("barbara", 6),
new TermFreq("bar", 5),
new TermFreq("barbara", 1)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo", 50),
new TermFreqPayload("bar", 10),
new TermFreqPayload("barbar", 10),
new TermFreqPayload("barbar", 12),
new TermFreqPayload("barbara", 6),
new TermFreqPayload("bar", 5),
new TermFreqPayload("barbara", 1)
);
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
@ -165,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testRandomRealisticKeys() throws IOException {
LineFileDocs lineFile = new LineFileDocs(random());
Map<String, Long> mapping = new HashMap<>();
List<TermFreq> keys = new ArrayList<>();
List<TermFreqPayload> keys = new ArrayList<>();
int howMany = atLeast(100); // this might bring up duplicates
for (int i = 0; i < howMany; i++) {
Document nextDoc = lineFile.nextDoc();
String title = nextDoc.getField("title").stringValue();
int randomWeight = random().nextInt(100);
keys.add(new TermFreq(title, randomWeight));
keys.add(new TermFreqPayload(title, randomWeight));
if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
mapping.put(title, Long.valueOf(randomWeight));
}
@ -183,15 +181,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
boolean doPayloads = random().nextBoolean();
if (doPayloads) {
List<TermFreqPayload> keysAndPayloads = new ArrayList<>();
for (TermFreq termFreq : keys) {
for (TermFreqPayload termFreq : keys) {
keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
}
analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
} else {
analyzingSuggester.build(new TermFreqArrayIterator(keys));
analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys));
}
for (TermFreq termFreq : keys) {
for (TermFreqPayload termFreq : keys) {
List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
for (LookupResult lookupResult : lookup) {
assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
@ -211,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
TermFreq keys[] = new TermFreq[] {
new TermFreq("the ghost of christmas past", 50),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@ -241,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testEmpty() throws Exception {
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}
public void testNoSeps() throws Exception {
TermFreq[] keys = new TermFreq[] {
new TermFreq("ab cd", 0),
new TermFreq("abcd", 1),
TermFreqPayload[] keys = new TermFreqPayload[] {
new TermFreqPayload("ab cd", 0),
new TermFreqPayload("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@ -318,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
TermFreq keys[] = new TermFreq[] {
new TermFreq("wifi network is slow", 50),
new TermFreq("wi fi network is fast", 10),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("wifi network is slow", 50),
new TermFreqPayload("wi fi network is fast", 10),
};
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
System.out.println("Results: " + results);
@ -384,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
TermFreq keys[] = new TermFreq[] {
new TermFreq("ab xc", 50),
new TermFreq("ba xd", 50),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("ab xc", 50),
new TermFreqPayload("ba xd", 50),
};
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@ -462,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 1),
new TermFreq("x y z", 3),
new TermFreq("x", 2),
new TermFreq("z z z", 20),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 1),
new TermFreqPayload("x y z", 3),
new TermFreqPayload("x", 2),
new TermFreqPayload("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@ -502,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 1),
new TermFreq("x y z", 3),
new TermFreq("x", 2),
new TermFreq("z z z", 20),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 1),
new TermFreqPayload("x y z", 3),
new TermFreqPayload("x", 2),
new TermFreqPayload("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@ -657,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
boolean doPayloads = random().nextBoolean();
TermFreq[] keys = null;
TermFreqPayload[] keys = null;
TermFreqPayload[] payloadKeys = null;
if (doPayloads) {
payloadKeys = new TermFreqPayload[numQueries];
} else {
keys = new TermFreq[numQueries];
keys = new TermFreqPayload[numQueries];
}
boolean preserveSep = random().nextBoolean();
@ -735,7 +733,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
payload = new BytesRef(bytes);
payloadKeys[i] = new TermFreqPayload(key, weight, payload);
} else {
keys[i] = new TermFreq(key, weight);
keys[i] = new TermFreqPayload(key, weight);
payload = null;
}
@ -758,7 +756,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
if (doPayloads) {
suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys)));
} else {
suggester.build(new TermFreqArrayIterator(shuffle(keys)));
suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys)));
}
for (String prefix : allPrefixes) {
@ -876,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1);
suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40),
new TermFreq("a ", 50), new TermFreq(" a", 60))));
suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40),
new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60))));
List<LookupResult> results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@ -891,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a", 2),
new TermFreq("a b c", 3),
new TermFreq("a c a", 1),
new TermFreq("a c b", 1),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("a", 2),
new TermFreqPayload("a b c", 3),
new TermFreqPayload("a c a", 1),
new TermFreqPayload("a c b", 1),
}));
suggester.lookup("a", false, 4);
@ -907,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a", 5),
new TermFreq("a b", 3),
new TermFreq("a c", 4),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("a", 5),
new TermFreqPayload("a b", 3),
new TermFreqPayload("a c", 4),
}));
List<LookupResult> results = suggester.lookup("a", false, 3);
@ -972,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(shuffle(
new TermFreq("hambone", 6),
new TermFreq("nellie", 5))));
suggester.build(new TermFreqPayloadArrayIterator(shuffle(
new TermFreqPayload("hambone", 6),
new TermFreqPayload("nellie", 5))));
List<LookupResult> results = suggester.lookup("nellie", false, 2);
assertEquals(2, results.size());
@ -1041,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a", 6),
new TermFreq("b", 5),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("a", 6),
new TermFreqPayload("b", 5),
}));
List<LookupResult> results = suggester.lookup("a", false, 2);
@ -1114,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a a", 50),
new TermFreq("a b", 50),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("a a", 50),
new TermFreqPayload("a b", 50),
}));
}
public void testDupSurfaceFormsMissingResults3() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a a", 7),
new TermFreq("a a", 7),
new TermFreq("a c", 6),
new TermFreq("a c", 3),
new TermFreq("a b", 5),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("a a", 7),
new TermFreqPayload("a a", 7),
new TermFreqPayload("a c", 6),
new TermFreqPayload("a c", 3),
new TermFreqPayload("a b", 5),
}));
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
}
@ -1136,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testEndingSpace() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("i love lucy", 7),
new TermFreq("isla de muerta", 8),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("i love lucy", 7),
new TermFreqPayload("isla de muerta", 8),
}));
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
@ -1169,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {new TermFreq("a", 1)}));
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)}));
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
}
public void testIllegalLookupArgument() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("а где Люси?", 7),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("а где Люси?", 7),
}));
try {
suggester.lookup("а\u001E", false, 3);

View File

@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util;
public class FuzzySuggesterTest extends LuceneTestCase {
public void testRandomEdits() throws IOException {
List<TermFreq> keys = new ArrayList<TermFreq>();
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new TermFreq("foo bar boo far", 12));
keys.add(new TermFreqPayload("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
public void testNonLatinRandomEdits() throws IOException {
List<TermFreq> keys = new ArrayList<TermFreq>();
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new TermFreq("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new TermFreq("фуу бар буу фар", 12));
keys.add(new TermFreqPayload("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
TermFreq keys[] = new TermFreq[] {
new TermFreq("foo", 50),
new TermFreq("bar", 10),
new TermFreq("barbar", 12),
new TermFreq("barbara", 6)
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("foo", 50),
new TermFreqPayload("bar", 10),
new TermFreqPayload("barbar", 12),
new TermFreqPayload("barbara", 6)
};
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2);
assertEquals(2, results.size());
@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
TermFreq keys[] = new TermFreq[] {
new TermFreq("the ghost of christmas past", 50),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard);
suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
public void testNoSeps() throws Exception {
TermFreq[] keys = new TermFreq[] {
new TermFreq("ab cd", 0),
new TermFreq("abcd", 1),
TermFreqPayload[] keys = new TermFreqPayload[] {
new TermFreqPayload("ab cd", 0),
new TermFreqPayload("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
};
TermFreq keys[] = new TermFreq[] {
new TermFreq("wifi network is slow", 50),
new TermFreq("wi fi network is fast", 10),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("wifi network is slow", 50),
new TermFreqPayload("wi fi network is fast", 10),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testEmpty() throws Exception {
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
};
TermFreq keys[] = new TermFreq[] {
new TermFreq("ab xc", 50),
new TermFreq("ba xd", 50),
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("ab xc", 50),
new TermFreqPayload("ba xd", 50),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 1),
new TermFreq("x y z", 3),
new TermFreq("x", 2),
new TermFreq("z z z", 20),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 1),
new TermFreqPayload("x y z", 3),
new TermFreqPayload("x", 2),
new TermFreqPayload("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 1),
new TermFreq("x y z", 3),
new TermFreq("x", 2),
new TermFreq("z z z", 20),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 1),
new TermFreqPayload("x y z", 3),
new TermFreqPayload("x", 2),
new TermFreqPayload("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
// Holds surface form separately:
private static class TermFreq2 implements Comparable<TermFreq2> {
private static class TermFreqPayload2 implements Comparable<TermFreqPayload2> {
public final String surfaceForm;
public final String analyzedForm;
public final long weight;
public TermFreq2(String surfaceForm, String analyzedForm, long weight) {
public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) {
this.surfaceForm = surfaceForm;
this.analyzedForm = analyzedForm;
this.weight = weight;
}
@Override
public int compareTo(TermFreq2 other) {
public int compareTo(TermFreqPayload2 other) {
int cmp = analyzedForm.compareTo(other.analyzedForm);
if (cmp != 0) {
return cmp;
@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
int numQueries = atLeast(100);
final List<TermFreq2> slowCompletor = new ArrayList<TermFreq2>();
final List<TermFreqPayload2> slowCompletor = new ArrayList<TermFreqPayload2>();
final TreeSet<String> allPrefixes = new TreeSet<String>();
final Set<String> seen = new HashSet<String>();
TermFreq[] keys = new TermFreq[numQueries];
TermFreqPayload[] keys = new TermFreqPayload[numQueries];
boolean preserveSep = random().nextBoolean();
boolean unicodeAware = random().nextBoolean();
@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
keys[i] = new TermFreq(key, weight);
keys[i] = new TermFreqPayload(key, weight);
slowCompletor.add(new TermFreq2(key, analyzedKey, weight));
slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
}
if (VERBOSE) {
// Don't just sort original list, to avoid VERBOSE
// altering the test:
List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
List<TermFreqPayload2> sorted = new ArrayList<TermFreqPayload2>(slowCompletor);
Collections.sort(sorted);
for(TermFreq2 ent : sorted) {
for(TermFreqPayload2 ent : sorted) {
System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
}
}
@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
FuzzySuggester suggester = new FuzzySuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
for (String prefix : allPrefixes) {
@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertTrue(automaton.isDeterministic());
// TODO: could be faster... but its slowCompletor for a reason
BytesRef spare = new BytesRef();
for (TermFreq2 e : slowCompletor) {
for (TermFreqPayload2 e : slowCompletor) {
spare.copyChars(e.analyzedForm);
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
for (IntsRef intsRef : finiteStrings) {
@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false);
List<TermFreq> keys = Arrays.asList(new TermFreq[] {
new TermFreq("a", 40),
new TermFreq("a ", 50),
new TermFreq(" a", 60),
List<TermFreqPayload> keys = Arrays.asList(new TermFreqPayload[] {
new TermFreqPayload("a", 40),
new TermFreqPayload("a ", 50),
new TermFreqPayload(" a", 60),
});
Collections.shuffle(keys, random());
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false);
List<TermFreq> keys = Arrays.asList(new TermFreq[] {
new TermFreq("foo bar", 40),
new TermFreq("foo bar baz", 50),
new TermFreq("barbaz", 60),
new TermFreq("barbazfoo", 10),
List<TermFreqPayload> keys = Arrays.asList(new TermFreqPayload[] {
new TermFreqPayload("foo bar", 40),
new TermFreqPayload("foo bar baz", 50),
new TermFreqPayload("barbaz", 60),
new TermFreqPayload("barbazfoo", 10),
});
Collections.shuffle(keys, random());
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testRandom2() throws Throwable {
final int NUM = atLeast(200);
final List<TermFreq> answers = new ArrayList<TermFreq>();
final List<TermFreqPayload> answers = new ArrayList<TermFreqPayload>();
final Set<String> seen = new HashSet<String>();
for(int i=0;i<NUM;i++) {
final String s = randomSimpleString(8);
if (!seen.contains(s)) {
answers.add(new TermFreq(s, random().nextInt(1000)));
answers.add(new TermFreqPayload(s, random().nextInt(1000)));
seen.add(s);
}
}
Collections.sort(answers, new Comparator<TermFreq>() {
Collections.sort(answers, new Comparator<TermFreqPayload>() {
@Override
public int compare(TermFreq a, TermFreq b) {
public int compare(TermFreqPayload a, TermFreqPayload b) {
return a.term.compareTo(b.term);
}
});
if (VERBOSE) {
System.out.println("\nTEST: targets");
for(TermFreq tf : answers) {
for(TermFreqPayload tf : answers) {
System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v);
}
}
@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
Collections.shuffle(answers, random());
suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()])));
suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()])));
final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
@ -1004,10 +1004,10 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
}
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreq> answers, String frag) {
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreqPayload> answers, String frag) {
final List<LookupResult> results = new ArrayList<LookupResult>();
final int fragLen = frag.length();
for(TermFreq tf : answers) {
for(TermFreqPayload tf : answers) {
//System.out.println(" check s=" + tf.term.utf8ToString());
boolean prefixMatches = true;
for(int i=0;i<prefixLen;i++) {

View File

@ -41,10 +41,10 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
@ -54,14 +54,14 @@ import org.junit.Ignore;
public class TestFreeTextSuggester extends LuceneTestCase {
public void testBasic() throws Exception {
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo bar baz blah", 50),
new TermFreq("boo foo bar foo bee", 20)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo bar baz blah", 50),
new TermFreqPayload("boo foo bar foo bee", 20)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
for(int i=0;i<2;i++) {
@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public void testIllegalByteDuringBuild() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo\u001ebar baz", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo\u001ebar baz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
try {
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
fail("did not hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public void testIllegalByteDuringQuery() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo bar baz", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo bar baz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
try {
sug.lookup("foo\u001eb", 10);
@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Skip header:
lfd.nextDoc();
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
sug.build(new TermFreqIterator() {
sug.build(new TermFreqPayloadIterator() {
private int count;
@ -161,6 +161,16 @@ public class TestFreeTextSuggester extends LuceneTestCase {
}
return new BytesRef(doc.get("body"));
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
});
if (VERBOSE) {
System.out.println(sug.sizeInBytes() + " bytes");
@ -175,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Make sure you can suggest based only on unigram model:
public void testUnigrams() throws Exception {
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo bar baz blah boo foo bar foo bee", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
// Sorts first by count, descending, second by term, ascending
assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11",
toString(sug.lookup("b", 10)));
@ -189,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Make sure the last token is not duplicated
public void testNoDupsAcrossGrams() throws Exception {
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo bar bar bar bar", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo bar bar bar bar", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
assertEquals("foo bar/1.00",
toString(sug.lookup("foo b", 10)));
}
// Lookup of just empty string produces unicode only matches:
public void testEmptyString() throws Exception {
Iterable<TermFreq> keys = shuffle(
new TermFreq("foo bar bar bar bar", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo bar bar bar bar", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
try {
sug.lookup("", 10);
fail("did not hit exception");
@ -228,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
}
};
Iterable<TermFreq> keys = shuffle(
new TermFreq("wizard of oz", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("wizard of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
assertEquals("wizard _ oz/1.00",
toString(sug.lookup("wizard of", 10)));
@ -256,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
}
};
Iterable<TermFreq> keys = shuffle(
new TermFreq("wizard of of oz", 50)
Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("wizard of of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new TermFreqArrayIterator(keys));
sug.build(new TermFreqPayloadArrayIterator(keys));
assertEquals("",
toString(sug.lookup("wizard of of", 10)));
}
@ -320,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Build suggester model:
FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20);
sug.build(new TermFreqIterator() {
sug.build(new TermFreqPayloadIterator() {
int upto;
@Override
@ -342,6 +352,16 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public long weight() {
return random().nextLong();
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
});
// Build inefficient but hopefully correct model:

View File

@ -28,8 +28,8 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
public static TermFreq tf(String t, int v) {
return new TermFreq(t, v);
public static TermFreqPayload tf(String t, int v) {
return new TermFreqPayload(t, v);
}
private FSTCompletion completion;
@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase {
super.setUp();
FSTCompletionBuilder builder = new FSTCompletionBuilder();
for (TermFreq tf : evalKeys()) {
for (TermFreqPayload tf : evalKeys()) {
builder.add(tf.term, (int) tf.v);
}
completion = builder.build();
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
}
private TermFreq[] evalKeys() {
final TermFreq[] keys = new TermFreq[] {
private TermFreqPayload[] evalKeys() {
final TermFreqPayload[] keys = new TermFreqPayload[] {
tf("one", 0),
tf("oneness", 1),
tf("onerous", 1),
@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
Random r = random();
List<TermFreq> keys = new ArrayList<TermFreq>();
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
for (int i = 0; i < 5000; i++) {
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1));
}
lookup.build(new TermFreqArrayIterator(keys));
lookup.build(new TermFreqPayloadArrayIterator(keys));
// All the weights were constant, so all returned buckets must be constant, whatever they
// are.
Long previous = null;
for (TermFreq tf : keys) {
for (TermFreqPayload tf : keys) {
Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
if (previous != null) {
assertEquals(previous, current);
@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase {
}
public void testMultilingualInput() throws Exception {
List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
List<TermFreqPayload> input = LookupBenchmarkTest.readTop50KWiki();
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(input));
for (TermFreq tf : input) {
lookup.build(new TermFreqPayloadArrayIterator(input));
for (TermFreqPayload tf : input) {
assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());
}
@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase {
}
public void testRandom() throws Exception {
List<TermFreq> freqs = new ArrayList<TermFreq>();
List<TermFreqPayload> freqs = new ArrayList<TermFreqPayload>();
Random rnd = random();
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
int weight = rnd.nextInt(100);
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight));
}
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()])));
for (TermFreq tf : freqs) {
for (TermFreqPayload tf : freqs) {
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);

View File

@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst;
import java.util.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil;
public class WFSTCompletionTest extends LuceneTestCase {
public void testBasic() throws Exception {
TermFreq keys[] = new TermFreq[] {
new TermFreq("foo", 50),
new TermFreq("bar", 10),
new TermFreq("barbar", 12),
new TermFreq("barbara", 6)
TermFreqPayload keys[] = new TermFreqPayload[] {
new TermFreqPayload("foo", 50),
new TermFreqPayload("bar", 10),
new TermFreqPayload("barbar", 12),
new TermFreqPayload("barbara", 6)
};
Random random = new Random(random().nextLong());
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 20),
new TermFreq("x", 2),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 20),
new TermFreqPayload("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("x y", 20),
new TermFreq("x", 2),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload("x y", 20),
new TermFreqPayload("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
final TreeSet<String> allPrefixes = new TreeSet<String>();
TermFreq[] keys = new TermFreq[numWords];
TermFreqPayload[] keys = new TermFreqPayload[numWords];
for (int i = 0; i < numWords; i++) {
String s;
@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase {
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
slowCompletor.put(s, (long)weight);
keys[i] = new TermFreq(s, weight);
keys[i] = new TermFreqPayload(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new TermFreqArrayIterator(keys));
suggester.build(new TermFreqPayloadArrayIterator(keys));
Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) {
@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq(key1, 50),
new TermFreq(key2, 50),
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
new TermFreqPayload(key1, 50),
new TermFreqPayload(key2, 50),
}));
}
public void testEmpty() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}