mirror of https://github.com/apache/lucene.git
LUCENE-5260: cutover all suggesters to TermFreqPayloadIterator
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1531664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
544d997664
commit
c1be5650a9
|
@ -150,6 +150,10 @@ API Changes:
|
|||
numBits parameter to allow growing/shrinking the copied bitset. You can
|
||||
use FixedBitSet.clone() if you only need to clone the bitset. (Shai Erera)
|
||||
|
||||
* LUCENE-5260: Use TermFreqPayloadIterator for all suggesters; those
|
||||
suggesters that can't support payloads will throw an exception if
|
||||
hasPayloads() is true. (Areek Zillur via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child
|
||||
|
|
|
@ -59,7 +59,7 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
return new HighFrequencyIterator();
|
||||
}
|
||||
|
||||
final class HighFrequencyIterator implements TermFreqIterator {
|
||||
final class HighFrequencyIterator implements TermFreqPayloadIterator {
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final TermsEnum termsEnum;
|
||||
private int minNumDocs;
|
||||
|
@ -98,5 +98,15 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
package org.apache.lucene.search.spell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
/**
|
||||
* Interface for enumerating term,weight pairs.
|
||||
*/
|
||||
public interface TermFreqIterator extends BytesRefIterator {
|
||||
|
||||
/** A term's weight, higher numbers mean better suggestions. */
|
||||
public long weight();
|
||||
|
||||
/**
|
||||
* Wraps a BytesRefIterator as a TermFreqIterator, with all weights
|
||||
* set to <code>1</code>
|
||||
*/
|
||||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||
private BytesRefIterator wrapped;
|
||||
|
||||
/**
|
||||
* Creates a new wrapper, wrapping the specified iterator and
|
||||
* specifying a weight value of <code>1</code> for all terms.
|
||||
*/
|
||||
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long weight() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
return wrapped.next();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,20 +17,67 @@ package org.apache.lucene.search.spell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs
|
||||
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
/**
|
||||
* Interface for enumerating term,weight,payload triples;
|
||||
* currently only {@link AnalyzingSuggester} and {@link
|
||||
* FuzzySuggester} support payloads.
|
||||
* currently only {@link AnalyzingSuggester}, {@link
|
||||
* FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads.
|
||||
*/
|
||||
public interface TermFreqPayloadIterator extends TermFreqIterator {
|
||||
public interface TermFreqPayloadIterator extends BytesRefIterator {
|
||||
|
||||
/** A term's weight, higher numbers mean better suggestions. */
|
||||
public long weight();
|
||||
|
||||
/** An arbitrary byte[] to record per suggestion. See
|
||||
* {@link LookupResult#payload} to retrieve the payload
|
||||
* for each suggestion. */
|
||||
public BytesRef payload();
|
||||
|
||||
/** Returns true if the iterator has payloads */
|
||||
public boolean hasPayloads();
|
||||
|
||||
/**
|
||||
* Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights
|
||||
* set to <code>1</code> and carries no payload
|
||||
*/
|
||||
public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
|
||||
private final BytesRefIterator wrapped;
|
||||
|
||||
/**
|
||||
* Creates a new wrapper, wrapping the specified iterator and
|
||||
* specifying a weight value of <code>1</code> for all terms
|
||||
* and nullifies associated payloads.
|
||||
*/
|
||||
public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long weight() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
return wrapped.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,8 @@ package org.apache.lucene.search.suggest;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
|
@ -27,22 +28,30 @@ import org.apache.lucene.util.Counter;
|
|||
* This wrapper buffers incoming elements.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||
public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
|
||||
// TODO keep this for now
|
||||
/** buffered term entries */
|
||||
protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
|
||||
/** buffered payload entries */
|
||||
protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
|
||||
/** current buffer position */
|
||||
protected int curPos = -1;
|
||||
/** buffered weights, parallel with {@link #entries} */
|
||||
protected long[] freqs = new long[1];
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final BytesRef payloadSpare = new BytesRef();
|
||||
private final boolean hasPayloads;
|
||||
|
||||
/** Creates a new iterator, buffering entries from the specified iterator */
|
||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
|
||||
BytesRef spare;
|
||||
int freqIndex = 0;
|
||||
hasPayloads = source.hasPayloads();
|
||||
while((spare = source.next()) != null) {
|
||||
entries.append(spare);
|
||||
if (hasPayloads) {
|
||||
payloads.append(source.payload());
|
||||
}
|
||||
if (freqIndex >= freqs.length) {
|
||||
freqs = ArrayUtil.grow(freqs, freqs.length+1);
|
||||
}
|
||||
|
@ -64,4 +73,17 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
if (hasPayloads && curPos < payloads.size()) {
|
||||
return payloads.get(payloadSpare, curPos);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
}
|
|
@ -47,12 +47,6 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
* The term, weight and (optionally) payload fields supplied
|
||||
* are required for ALL documents and has to be stored
|
||||
* </li>
|
||||
* <li>
|
||||
* This Dictionary implementation is not compatible with the following Suggesters:
|
||||
* {@link JaspellLookup}, {@link TSTLookup}, {@link FSTCompletionLookup},
|
||||
* {@link WFSTCompletionLookup} and {@link AnalyzingInfixSuggester}.
|
||||
* see https://issues.apache.org/jira/browse/LUCENE-5260
|
||||
* </li>
|
||||
* </ul>
|
||||
*/
|
||||
public class DocumentDictionary implements Dictionary {
|
||||
|
@ -95,7 +89,7 @@ public class DocumentDictionary implements Dictionary {
|
|||
final class TermWeightPayloadIterator implements TermFreqPayloadIterator {
|
||||
private final int docCount;
|
||||
private final Set<String> relevantFields;
|
||||
private final boolean withPayload;
|
||||
private final boolean hasPayloads;
|
||||
private final Bits liveDocs;
|
||||
private int currentDocId = -1;
|
||||
private long currentWeight;
|
||||
|
@ -106,13 +100,13 @@ public class DocumentDictionary implements Dictionary {
|
|||
* index. setting <code>withPayload</code> to false, implies an iterator
|
||||
* over only term and weight.
|
||||
*/
|
||||
public TermWeightPayloadIterator(boolean withPayload) throws IOException {
|
||||
public TermWeightPayloadIterator(boolean hasPayloads) throws IOException {
|
||||
docCount = reader.maxDoc() - 1;
|
||||
this.withPayload = withPayload;
|
||||
this.hasPayloads = hasPayloads;
|
||||
currentPayload = null;
|
||||
liveDocs = MultiFields.getLiveDocs(reader);
|
||||
List<String> relevantFieldList;
|
||||
if(withPayload) {
|
||||
if(hasPayloads) {
|
||||
relevantFieldList = Arrays.asList(field, weightField, payloadField);
|
||||
} else {
|
||||
relevantFieldList = Arrays.asList(field, weightField);
|
||||
|
@ -135,7 +129,7 @@ public class DocumentDictionary implements Dictionary {
|
|||
|
||||
StoredDocument doc = reader.document(currentDocId, relevantFields);
|
||||
|
||||
if (withPayload) {
|
||||
if (hasPayloads) {
|
||||
StorableField payload = doc.getField(payloadField);
|
||||
if (payload == null) {
|
||||
throw new IllegalArgumentException(payloadField + " does not exist");
|
||||
|
@ -170,5 +164,10 @@ public class DocumentDictionary implements Dictionary {
|
|||
return currentPayload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.apache.lucene.search.suggest;
|
|||
import java.io.*;
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -57,11 +57,11 @@ public class FileDictionary implements Dictionary {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TermFreqIterator getWordsIterator() {
|
||||
public TermFreqPayloadIterator getWordsIterator() {
|
||||
return new FileIterator();
|
||||
}
|
||||
|
||||
final class FileIterator implements TermFreqIterator {
|
||||
final class FileIterator implements TermFreqPayloadIterator {
|
||||
private long curFreq;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
|
||||
|
@ -98,5 +98,15 @@ public class FileDictionary implements Dictionary {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -154,25 +154,25 @@ public abstract class Lookup {
|
|||
|
||||
/** Build lookup from a dictionary. Some implementations may require sorted
|
||||
* or unsorted keys from the dictionary's iterator - use
|
||||
* {@link SortedTermFreqIteratorWrapper} or
|
||||
* {@link UnsortedTermFreqIteratorWrapper} in such case.
|
||||
* {@link SortedTermFreqPayloadIteratorWrapper} or
|
||||
* {@link UnsortedTermFreqPayloadIteratorWrapper} in such case.
|
||||
*/
|
||||
public void build(Dictionary dict) throws IOException {
|
||||
BytesRefIterator it = dict.getWordsIterator();
|
||||
TermFreqIterator tfit;
|
||||
if (it instanceof TermFreqIterator) {
|
||||
tfit = (TermFreqIterator)it;
|
||||
TermFreqPayloadIterator tfit;
|
||||
if (it instanceof TermFreqPayloadIterator) {
|
||||
tfit = (TermFreqPayloadIterator)it;
|
||||
} else {
|
||||
tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
|
||||
tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it);
|
||||
}
|
||||
build(tfit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
|
||||
* Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}.
|
||||
* The implementation might re-sort the data internally.
|
||||
*/
|
||||
public abstract void build(TermFreqIterator tfit) throws IOException;
|
||||
public abstract void build(TermFreqPayloadIterator tfit) throws IOException;
|
||||
|
||||
/**
|
||||
* Look up a key and return possible completion for this key.
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
|
@ -34,23 +34,25 @@ import org.apache.lucene.util.IOUtils;
|
|||
* This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
||||
public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
|
||||
|
||||
private final TermFreqIterator source;
|
||||
private final TermFreqPayloadIterator source;
|
||||
private File tempInput;
|
||||
private File tempSorted;
|
||||
private final ByteSequencesReader reader;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
private final boolean hasPayloads;
|
||||
private boolean done = false;
|
||||
|
||||
private long weight;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private BytesRef payload = new BytesRef();
|
||||
|
||||
/**
|
||||
* Creates a new sorted wrapper, using {@link
|
||||
* BytesRef#getUTF8SortedAsUnicodeComparator} for
|
||||
* sorting. */
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
|
||||
this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
}
|
||||
|
||||
|
@ -58,7 +60,8 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
* Creates a new sorted wrapper, sorting by BytesRef
|
||||
* (ascending) then cost (ascending).
|
||||
*/
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||
public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||
this.hasPayloads = source.hasPayloads();
|
||||
this.source = source;
|
||||
this.comparator = comparator;
|
||||
this.reader = sort();
|
||||
|
@ -74,6 +77,9 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
ByteArrayDataInput input = new ByteArrayDataInput();
|
||||
if (reader.read(scratch)) {
|
||||
weight = decode(scratch, input);
|
||||
if (hasPayloads) {
|
||||
payload = decodePayload(scratch, input);
|
||||
}
|
||||
success = true;
|
||||
return scratch;
|
||||
}
|
||||
|
@ -93,6 +99,19 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
return weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
if (hasPayloads) {
|
||||
return payload;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
|
||||
/** Sortes by BytesRef (ascending) then cost (ascending). */
|
||||
private final Comparator<BytesRef> tieBreakByCostComparator = new Comparator<BytesRef>() {
|
||||
|
||||
|
@ -111,6 +130,10 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
rightScratch.length = right.length;
|
||||
long leftCost = decode(leftScratch, input);
|
||||
long rightCost = decode(rightScratch, input);
|
||||
if (hasPayloads) {
|
||||
decodePayload(leftScratch, input);
|
||||
decodePayload(rightScratch, input);
|
||||
}
|
||||
int cmp = comparator.compare(leftScratch, rightScratch);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
|
@ -133,7 +156,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
|
||||
|
||||
while ((spare = source.next()) != null) {
|
||||
encode(writer, output, buffer, spare, source.weight());
|
||||
encode(writer, output, buffer, spare, source.payload(), source.weight());
|
||||
}
|
||||
writer.close();
|
||||
new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted);
|
||||
|
@ -164,13 +187,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
}
|
||||
}
|
||||
|
||||
/** encodes an entry (bytes+weight) to the provided writer */
|
||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
|
||||
if (spare.length + 8 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 8);
|
||||
/** encodes an entry (bytes+(payload)+weight) to the provided writer */
|
||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
|
||||
int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
|
||||
if (requiredLength >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, requiredLength);
|
||||
}
|
||||
output.reset(buffer);
|
||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||
if (hasPayloads) {
|
||||
output.writeBytes(payload.bytes, payload.offset, payload.length);
|
||||
output.writeShort((short) payload.length);
|
||||
}
|
||||
output.writeLong(weight);
|
||||
writer.write(buffer, 0, output.getPosition());
|
||||
}
|
||||
|
@ -182,4 +210,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
scratch.length -= 8; // long
|
||||
return tmpInput.readLong();
|
||||
}
|
||||
|
||||
/** decodes the payload at the current position */
|
||||
protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
|
||||
tmpInput.reset(scratch.bytes);
|
||||
tmpInput.skipBytes(scratch.length - 2); // skip to payload size
|
||||
short payloadLength = tmpInput.readShort(); // read payload size
|
||||
tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
|
||||
BytesRef payloadScratch = new BytesRef(payloadLength);
|
||||
tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
|
||||
payloadScratch.length = payloadLength;
|
||||
scratch.length -= 2; // payload length info (short)
|
||||
scratch.length -= payloadLength; // payload
|
||||
return payloadScratch;
|
||||
}
|
||||
}
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.search.suggest;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -28,16 +28,17 @@ import org.apache.lucene.util.BytesRef;
|
|||
* random order.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
|
||||
public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper {
|
||||
// TODO keep this for now
|
||||
private final int[] ords;
|
||||
private int currentOrd = -1;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final BytesRef payloadSpare = new BytesRef();
|
||||
/**
|
||||
* Creates a new iterator, wrapping the specified iterator and
|
||||
* returning elements in a random order.
|
||||
*/
|
||||
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
|
||||
super(source);
|
||||
ords = new int[entries.size()];
|
||||
Random random = new Random();
|
||||
|
@ -54,13 +55,24 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
|
|||
|
||||
@Override
|
||||
public long weight() {
|
||||
assert currentOrd == ords[curPos];
|
||||
return freqs[currentOrd];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++curPos < entries.size()) {
|
||||
return entries.get(spare, (currentOrd = ords[curPos]));
|
||||
currentOrd = ords[curPos];
|
||||
return entries.get(spare, currentOrd);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
if (hasPayloads() && curPos < payloads.size()) {
|
||||
assert currentOrd == ords[curPos];
|
||||
return payloads.get(payloadSpare, currentOrd);
|
||||
}
|
||||
return null;
|
||||
}
|
|
@ -65,7 +65,6 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
|
@ -176,19 +175,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator iter) throws IOException {
|
||||
public void build(TermFreqPayloadIterator iter) throws IOException {
|
||||
|
||||
if (searcher != null) {
|
||||
searcher.getIndexReader().close();
|
||||
searcher = null;
|
||||
}
|
||||
|
||||
TermFreqPayloadIterator payloads;
|
||||
if (iter instanceof TermFreqPayloadIterator) {
|
||||
payloads = (TermFreqPayloadIterator) iter;
|
||||
} else {
|
||||
payloads = null;
|
||||
}
|
||||
|
||||
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
|
||||
|
||||
IndexWriter w = null;
|
||||
|
@ -236,7 +230,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
doc.add(weightField);
|
||||
|
||||
Field payloadField;
|
||||
if (payloads != null) {
|
||||
if (iter.hasPayloads()) {
|
||||
payloadField = new BinaryDocValuesField("payloads", new BytesRef());
|
||||
doc.add(payloadField);
|
||||
} else {
|
||||
|
@ -250,8 +244,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
textGramField.setStringValue(textString);
|
||||
textDVField.setBytesValue(text);
|
||||
weightField.setLongValue(iter.weight());
|
||||
if (payloads != null) {
|
||||
payloadField.setBytesValue(payloads.payload());
|
||||
if (iter.hasPayloads()) {
|
||||
payloadField.setBytesValue(iter.payload());
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
|
@ -381,19 +380,13 @@ public class AnalyzingSuggester extends Lookup {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator iterator) throws IOException {
|
||||
public void build(TermFreqPayloadIterator iterator) throws IOException {
|
||||
String prefix = getClass().getSimpleName();
|
||||
File directory = Sort.defaultTempDir();
|
||||
File tempInput = File.createTempFile(prefix, ".input", directory);
|
||||
File tempSorted = File.createTempFile(prefix, ".sorted", directory);
|
||||
|
||||
TermFreqPayloadIterator payloads;
|
||||
if (iterator instanceof TermFreqPayloadIterator) {
|
||||
payloads = (TermFreqPayloadIterator) iterator;
|
||||
} else {
|
||||
payloads = null;
|
||||
}
|
||||
hasPayloads = payloads != null;
|
||||
hasPayloads = iterator.hasPayloads();
|
||||
|
||||
Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
|
||||
Sort.ByteSequencesReader reader = null;
|
||||
|
@ -432,7 +425,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
if (surfaceForm.length > (Short.MAX_VALUE-2)) {
|
||||
throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
|
||||
}
|
||||
payload = payloads.payload();
|
||||
payload = iterator.payload();
|
||||
// payload + surfaceLength (short)
|
||||
requiredLength += payload.length + 2;
|
||||
} else {
|
||||
|
@ -470,7 +463,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
writer.close();
|
||||
|
||||
// Sort all input/output pairs (required by FST.Builder):
|
||||
new Sort(new AnalyzingComparator(payloads != null)).sort(tempInput, tempSorted);
|
||||
new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
|
||||
|
||||
// Free disk space:
|
||||
tempInput.delete();
|
||||
|
|
|
@ -54,7 +54,6 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
|
@ -274,15 +273,15 @@ public class FreeTextSuggester extends Lookup {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator iterator) throws IOException {
|
||||
public void build(TermFreqPayloadIterator iterator) throws IOException {
|
||||
build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
|
||||
}
|
||||
|
||||
/** Build the suggest index, using up to the specified
|
||||
* amount of temporary RAM while building. Note that
|
||||
* the weights for the suggestions are ignored. */
|
||||
public void build(TermFreqIterator iterator, double ramBufferSizeMB) throws IOException {
|
||||
if (iterator instanceof TermFreqPayloadIterator) {
|
||||
public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException {
|
||||
if (iterator.hasPayloads()) {
|
||||
throw new IllegalArgumentException("payloads are not supported");
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.io.OutputStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort.SortInfo;
|
||||
|
@ -43,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs;
|
|||
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
|
||||
*
|
||||
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
|
||||
* to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
|
||||
* to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()}
|
||||
* to match the number of buckets. For the rationale for bucketing, see
|
||||
* {@link FSTCompletion}.
|
||||
*
|
||||
|
@ -96,7 +95,7 @@ public class FSTCompletionLookup extends Lookup {
|
|||
|
||||
/**
|
||||
* This constructor prepares for creating a suggested FST using the
|
||||
* {@link #build(TermFreqIterator)} method. The number of weight
|
||||
* {@link #build(TermFreqPayloadIterator)} method. The number of weight
|
||||
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
|
||||
* exact matches are promoted to the top of the suggestions list.
|
||||
*/
|
||||
|
@ -106,7 +105,7 @@ public class FSTCompletionLookup extends Lookup {
|
|||
|
||||
/**
|
||||
* This constructor prepares for creating a suggested FST using the
|
||||
* {@link #build(TermFreqIterator)} method.
|
||||
* {@link #build(TermFreqPayloadIterator)} method.
|
||||
*
|
||||
* @param buckets
|
||||
* The number of weight discretization buckets (see
|
||||
|
@ -141,8 +140,8 @@ public class FSTCompletionLookup extends Lookup {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
if (tfit instanceof TermFreqPayloadIterator) {
|
||||
public void build(TermFreqPayloadIterator tfit) throws IOException {
|
||||
if (tfit.hasPayloads()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support payloads");
|
||||
}
|
||||
File tempInput = File.createTempFile(
|
||||
|
|
|
@ -25,11 +25,10 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
||||
import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
|
@ -93,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator iterator) throws IOException {
|
||||
if (iterator instanceof TermFreqPayloadIterator) {
|
||||
public void build(TermFreqPayloadIterator iterator) throws IOException {
|
||||
if (iterator.hasPayloads()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support payloads");
|
||||
}
|
||||
BytesRef scratch = new BytesRef();
|
||||
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
|
||||
TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
BytesRef previous = null;
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
@ -255,14 +254,15 @@ public class WFSTCompletionLookup extends Lookup {
|
|||
return Integer.MAX_VALUE - (int)value;
|
||||
}
|
||||
|
||||
private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
|
||||
private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper {
|
||||
|
||||
WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
|
||||
super(source);
|
||||
assert source.hasPayloads() == false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
|
||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
|
||||
if (spare.length + 4 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 4);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.io.OutputStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
|
||||
|
@ -47,13 +46,13 @@ public class JaspellLookup extends Lookup {
|
|||
|
||||
/**
|
||||
* Creates a new empty trie
|
||||
* @see #build(TermFreqIterator)
|
||||
* @see #build(TermFreqPayloadIterator)
|
||||
* */
|
||||
public JaspellLookup() {}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
if (tfit instanceof TermFreqPayloadIterator) {
|
||||
public void build(TermFreqPayloadIterator tfit) throws IOException {
|
||||
if (tfit.hasPayloads()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support payloads");
|
||||
}
|
||||
trie = new JaspellTernarySearchTrie();
|
||||
|
|
|
@ -25,10 +25,9 @@ import java.io.OutputStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
||||
import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -46,19 +45,19 @@ public class TSTLookup extends Lookup {
|
|||
|
||||
/**
|
||||
* Creates a new TSTLookup with an empty Ternary Search Tree.
|
||||
* @see #build(TermFreqIterator)
|
||||
* @see #build(TermFreqPayloadIterator)
|
||||
*/
|
||||
public TSTLookup() {}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
if (tfit instanceof TermFreqPayloadIterator) {
|
||||
public void build(TermFreqPayloadIterator tfit) throws IOException {
|
||||
if (tfit.hasPayloads()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support payloads");
|
||||
}
|
||||
root = new TernaryTreeNode();
|
||||
|
||||
// make sure it's sorted and the comparator uses UTF16 sort order
|
||||
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
|
||||
ArrayList<String> tokens = new ArrayList<String>();
|
||||
ArrayList<Number> vals = new ArrayList<Number>();
|
||||
|
|
|
@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
/**
|
||||
* Input term/weight pairs.
|
||||
*/
|
||||
private static TermFreq [] dictionaryInput;
|
||||
private static TermFreqPayload [] dictionaryInput;
|
||||
|
||||
/**
|
||||
* Benchmark term/weight pairs (randomized order).
|
||||
*/
|
||||
private static List<TermFreq> benchmarkInput;
|
||||
private static List<TermFreqPayload> benchmarkInput;
|
||||
|
||||
/**
|
||||
* Loads terms and frequencies from Wikipedia (cached).
|
||||
|
@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
@BeforeClass
|
||||
public static void setup() throws Exception {
|
||||
assert false : "disable assertions before running benchmarks!";
|
||||
List<TermFreq> input = readTop50KWiki();
|
||||
List<TermFreqPayload> input = readTop50KWiki();
|
||||
Collections.shuffle(input, random);
|
||||
LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
|
||||
LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]);
|
||||
Collections.shuffle(input, random);
|
||||
LookupBenchmarkTest.benchmarkInput = input;
|
||||
}
|
||||
|
@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
/**
|
||||
* Collect the multilingual input for benchmarks/ tests.
|
||||
*/
|
||||
public static List<TermFreq> readTop50KWiki() throws Exception {
|
||||
List<TermFreq> input = new ArrayList<TermFreq>();
|
||||
public static List<TermFreqPayload> readTop50KWiki() throws Exception {
|
||||
List<TermFreqPayload> input = new ArrayList<TermFreqPayload>();
|
||||
URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
|
||||
assert resource != null : "Resource missing: Top50KWiki.utf8";
|
||||
|
||||
|
@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
assertTrue("No | separator?: " + line, tab >= 0);
|
||||
int weight = Integer.parseInt(line.substring(tab + 1));
|
||||
String key = line.substring(0, tab);
|
||||
input.add(new TermFreq(key, weight));
|
||||
input.add(new TermFreqPayload(key, weight));
|
||||
}
|
||||
br.close();
|
||||
return input;
|
||||
|
@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
/**
|
||||
* Create {@link Lookup} instance and populate it.
|
||||
*/
|
||||
private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
|
||||
private Lookup buildLookup(Class<? extends Lookup> cls, TermFreqPayload[] input) throws Exception {
|
||||
Lookup lookup = null;
|
||||
try {
|
||||
lookup = cls.newInstance();
|
||||
|
@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
lookup = ctor.newInstance(a);
|
||||
}
|
||||
}
|
||||
lookup.build(new TermFreqArrayIterator(input));
|
||||
lookup.build(new TermFreqPayloadArrayIterator(input));
|
||||
return lookup;
|
||||
}
|
||||
|
||||
|
@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
final Lookup lookup = buildLookup(cls, dictionaryInput);
|
||||
|
||||
final List<String> input = new ArrayList<String>(benchmarkInput.size());
|
||||
for (TermFreq tf : benchmarkInput) {
|
||||
for (TermFreqPayload tf : benchmarkInput) {
|
||||
String s = tf.term.utf8ToString();
|
||||
String sub = s.substring(0, Math.min(s.length(),
|
||||
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));
|
||||
|
|
|
@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase {
|
|||
|
||||
// Add all input keys.
|
||||
Lookup lookup = lookupClass.newInstance();
|
||||
TermFreq[] keys = new TermFreq[this.keys.length];
|
||||
TermFreqPayload[] keys = new TermFreqPayload[this.keys.length];
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
keys[i] = new TermFreq(this.keys[i], i);
|
||||
lookup.build(new TermFreqArrayIterator(keys));
|
||||
keys[i] = new TermFreqPayload(this.keys[i], i);
|
||||
lookup.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
// Store the suggester.
|
||||
File storeDir = TEMP_DIR;
|
||||
|
@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase {
|
|||
// Assert validity.
|
||||
Random random = random();
|
||||
long previous = Long.MIN_VALUE;
|
||||
for (TermFreq k : keys) {
|
||||
for (TermFreqPayload k : keys) {
|
||||
List<LookupResult> list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
|
||||
assertEquals(1, list.size());
|
||||
LookupResult lookupResult = list.get(0);
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public final class TermFreq {
|
||||
public final BytesRef term;
|
||||
public final long v;
|
||||
|
||||
public TermFreq(String term, long v) {
|
||||
this(new BytesRef(term), v);
|
||||
}
|
||||
|
||||
public TermFreq(BytesRef term, long v) {
|
||||
this.term = term;
|
||||
this.v = v;
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
|
||||
*/
|
||||
public final class TermFreqArrayIterator implements TermFreqIterator {
|
||||
private final Iterator<TermFreq> i;
|
||||
private TermFreq current;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
|
||||
public TermFreqArrayIterator(Iterator<TermFreq> i) {
|
||||
this.i = i;
|
||||
}
|
||||
|
||||
public TermFreqArrayIterator(TermFreq [] i) {
|
||||
this(Arrays.asList(i));
|
||||
}
|
||||
|
||||
public TermFreqArrayIterator(Iterable<TermFreq> i) {
|
||||
this(i.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long weight() {
|
||||
return current.v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
if (i.hasNext()) {
|
||||
current = i.next();
|
||||
spare.copyBytes(current.term);
|
||||
return spare;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -23,14 +23,32 @@ public final class TermFreqPayload {
|
|||
public final BytesRef term;
|
||||
public final long v;
|
||||
public final BytesRef payload;
|
||||
|
||||
public TermFreqPayload(String term, long v, BytesRef payload) {
|
||||
this(new BytesRef(term), v, payload);
|
||||
}
|
||||
public final boolean hasPayloads;
|
||||
|
||||
public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
|
||||
this(term, v, payload, true);
|
||||
}
|
||||
|
||||
public TermFreqPayload(String term, long v, BytesRef payload) {
|
||||
this(new BytesRef(term), v, payload, true);
|
||||
}
|
||||
|
||||
public TermFreqPayload(BytesRef term, long v) {
|
||||
this(term, v, null, false);
|
||||
}
|
||||
|
||||
public TermFreqPayload(String term, long v) {
|
||||
this(new BytesRef(term), v, null, false);
|
||||
}
|
||||
|
||||
public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
|
||||
this.term = term;
|
||||
this.v = v;
|
||||
this.payload = payload;
|
||||
this.hasPayloads = hasPayloads;
|
||||
}
|
||||
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
}
|
|
@ -20,26 +20,33 @@ package org.apache.lucene.search.suggest;
|
|||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
|
||||
* A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s.
|
||||
*/
|
||||
public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator {
|
||||
private final Iterator<TermFreqPayload> i;
|
||||
private final boolean hasPayloads;
|
||||
private boolean first;
|
||||
private TermFreqPayload current;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
|
||||
public TermFreqPayloadArrayIterator(Iterator<TermFreqPayload> i) {
|
||||
this.i = i;
|
||||
if (i.hasNext()) {
|
||||
current = i.next();
|
||||
first = true;
|
||||
this.hasPayloads = current.hasPayloads;
|
||||
} else {
|
||||
this.hasPayloads = false;
|
||||
}
|
||||
}
|
||||
|
||||
public TermFreqPayloadArrayIterator(TermFreqPayload[] i) {
|
||||
this(Arrays.asList(i));
|
||||
}
|
||||
|
||||
public TermFreqPayloadArrayIterator(Iterable<TermFreqPayload> i) {
|
||||
this(i.iterator());
|
||||
}
|
||||
|
@ -51,8 +58,12 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat
|
|||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
if (i.hasNext()) {
|
||||
if (i.hasNext() || (first && current!=null)) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
current = i.next();
|
||||
}
|
||||
spare.copyBytes(current.term);
|
||||
return spare;
|
||||
}
|
||||
|
@ -63,4 +74,9 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat
|
|||
public BytesRef payload() {
|
||||
return current.payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTermFreqIterator extends LuceneTestCase {
|
||||
public void testEmpty() throws Exception {
|
||||
TermFreqArrayIterator iterator = new TermFreqArrayIterator(new TermFreq[0]);
|
||||
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
assertNull(wrapper.next());
|
||||
wrapper = new UnsortedTermFreqIteratorWrapper(iterator);
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
|
||||
public void testTerms() throws Exception {
|
||||
Random random = random();
|
||||
int num = atLeast(10000);
|
||||
|
||||
Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>(comparator);
|
||||
TermFreq[] unsorted = new TermFreq[num];
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
BytesRef key;
|
||||
do {
|
||||
key = new BytesRef(_TestUtil.randomUnicodeString(random));
|
||||
} while (sorted.containsKey(key));
|
||||
long value = random.nextLong();
|
||||
sorted.put(key, value);
|
||||
unsorted[i] = new TermFreq(key, value);
|
||||
}
|
||||
|
||||
// test the sorted iterator wrapper
|
||||
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator);
|
||||
Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
|
||||
while (expected.hasNext()) {
|
||||
Map.Entry<BytesRef,Long> entry = expected.next();
|
||||
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().longValue(), wrapper.weight());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
|
||||
// test the unsorted iterator wrapper
|
||||
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
|
||||
TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
|
||||
BytesRef key;
|
||||
while ((key = wrapper.next()) != null) {
|
||||
long value = wrapper.weight();
|
||||
actual.put(BytesRef.deepCopyOf(key), value);
|
||||
}
|
||||
assertEquals(sorted, actual);
|
||||
}
|
||||
|
||||
public static long asLong(BytesRef b) {
|
||||
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
|
||||
b.offset + 4) & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
private static int asIntInternal(BytesRef b, int pos) {
|
||||
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
|
||||
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTermFreqPayloadIterator extends LuceneTestCase {
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]);
|
||||
TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
assertNull(wrapper.next());
|
||||
wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator);
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
|
||||
public void testTerms() throws Exception {
|
||||
Random random = random();
|
||||
int num = atLeast(10000);
|
||||
|
||||
Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> sorted = new TreeMap<>(comparator);
|
||||
TreeMap<BytesRef, Long> sortedWithoutPayload = new TreeMap<>(comparator);
|
||||
TermFreqPayload[] unsorted = new TermFreqPayload[num];
|
||||
TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num];
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
BytesRef key;
|
||||
BytesRef payload;
|
||||
do {
|
||||
key = new BytesRef(_TestUtil.randomUnicodeString(random));
|
||||
payload = new BytesRef(_TestUtil.randomUnicodeString(random));
|
||||
} while (sorted.containsKey(key));
|
||||
long value = random.nextLong();
|
||||
sortedWithoutPayload.put(key, value);
|
||||
sorted.put(key, new SimpleEntry<>(value, payload));
|
||||
unsorted[i] = new TermFreqPayload(key, value, payload);
|
||||
unsortedWithoutPayload[i] = new TermFreqPayload(key, value);
|
||||
}
|
||||
|
||||
// test the sorted iterator wrapper with payloads
|
||||
TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
|
||||
while (expected.hasNext()) {
|
||||
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
|
||||
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
assertEquals(entry.getValue().getValue(), wrapper.payload());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
|
||||
// test the unsorted iterator wrapper with payloads
|
||||
wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted));
|
||||
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();
|
||||
BytesRef key;
|
||||
while ((key = wrapper.next()) != null) {
|
||||
long value = wrapper.weight();
|
||||
BytesRef payload = wrapper.payload();
|
||||
actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload)));
|
||||
}
|
||||
assertEquals(sorted, actual);
|
||||
|
||||
// test the sorted iterator wrapper without payloads
|
||||
TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator);
|
||||
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
|
||||
while (expectedWithoutPayload.hasNext()) {
|
||||
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
|
||||
|
||||
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
|
||||
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
|
||||
assertNull(wrapperWithoutPayload.payload());
|
||||
}
|
||||
assertNull(wrapperWithoutPayload.next());
|
||||
|
||||
// test the unsorted iterator wrapper without payloads
|
||||
wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload));
|
||||
TreeMap<BytesRef, Long> actualWithoutPayload = new TreeMap<>();
|
||||
while ((key = wrapperWithoutPayload.next()) != null) {
|
||||
long value = wrapperWithoutPayload.weight();
|
||||
assertNull(wrapperWithoutPayload.payload());
|
||||
actualWithoutPayload.put(BytesRef.deepCopyOf(key), value);
|
||||
}
|
||||
assertEquals(sortedWithoutPayload, actualWithoutPayload);
|
||||
}
|
||||
|
||||
public static long asLong(BytesRef b) {
|
||||
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
|
||||
b.offset + 4) & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
private static int asIntInternal(BytesRef b, int pos) {
|
||||
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
|
||||
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
|
||||
}
|
||||
}
|
|
@ -52,8 +52,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.TermFreq;
|
||||
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayload;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -65,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
|
||||
public void testKeyword() throws Exception {
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo", 50),
|
||||
new TermFreq("bar", 10),
|
||||
new TermFreq("barbar", 10),
|
||||
new TermFreq("barbar", 12),
|
||||
new TermFreq("barbara", 6),
|
||||
new TermFreq("bar", 5),
|
||||
new TermFreq("barbara", 1)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo", 50),
|
||||
new TermFreqPayload("bar", 10),
|
||||
new TermFreqPayload("barbar", 10),
|
||||
new TermFreqPayload("barbar", 12),
|
||||
new TermFreqPayload("barbara", 6),
|
||||
new TermFreqPayload("bar", 5),
|
||||
new TermFreqPayload("barbara", 1)
|
||||
);
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
// top N of 2, but only foo is available
|
||||
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
|
||||
|
@ -165,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
public void testRandomRealisticKeys() throws IOException {
|
||||
LineFileDocs lineFile = new LineFileDocs(random());
|
||||
Map<String, Long> mapping = new HashMap<>();
|
||||
List<TermFreq> keys = new ArrayList<>();
|
||||
List<TermFreqPayload> keys = new ArrayList<>();
|
||||
|
||||
int howMany = atLeast(100); // this might bring up duplicates
|
||||
for (int i = 0; i < howMany; i++) {
|
||||
Document nextDoc = lineFile.nextDoc();
|
||||
String title = nextDoc.getField("title").stringValue();
|
||||
int randomWeight = random().nextInt(100);
|
||||
keys.add(new TermFreq(title, randomWeight));
|
||||
keys.add(new TermFreqPayload(title, randomWeight));
|
||||
if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
|
||||
mapping.put(title, Long.valueOf(randomWeight));
|
||||
}
|
||||
|
@ -183,15 +181,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
boolean doPayloads = random().nextBoolean();
|
||||
if (doPayloads) {
|
||||
List<TermFreqPayload> keysAndPayloads = new ArrayList<>();
|
||||
for (TermFreq termFreq : keys) {
|
||||
for (TermFreqPayload termFreq : keys) {
|
||||
keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
|
||||
}
|
||||
analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
|
||||
} else {
|
||||
analyzingSuggester.build(new TermFreqArrayIterator(keys));
|
||||
analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
}
|
||||
|
||||
for (TermFreq termFreq : keys) {
|
||||
for (TermFreqPayload termFreq : keys) {
|
||||
List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
|
||||
for (LookupResult lookupResult : lookup) {
|
||||
assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
|
||||
|
@ -211,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
* basic "standardanalyzer" test with stopword removal
|
||||
*/
|
||||
public void testStandard() throws Exception {
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("the ghost of christmas past", 50),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("the ghost of christmas past", 50),
|
||||
};
|
||||
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
|
||||
suggester.setPreservePositionIncrements(false);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
|
||||
assertEquals(1, results.size());
|
||||
|
@ -241,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
public void testEmpty() throws Exception {
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
|
||||
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
}
|
||||
|
||||
public void testNoSeps() throws Exception {
|
||||
TermFreq[] keys = new TermFreq[] {
|
||||
new TermFreq("ab cd", 0),
|
||||
new TermFreq("abcd", 1),
|
||||
TermFreqPayload[] keys = new TermFreqPayload[] {
|
||||
new TermFreqPayload("ab cd", 0),
|
||||
new TermFreqPayload("abcd", 1),
|
||||
};
|
||||
|
||||
int options = 0;
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
// TODO: would be nice if "ab " would allow the test to
|
||||
// pass, and more generally if the analyzer can know
|
||||
// that the user's current query has ended at a word,
|
||||
|
@ -318,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("wifi network is slow", 50),
|
||||
new TermFreq("wi fi network is fast", 10),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("wifi network is slow", 50),
|
||||
new TermFreqPayload("wi fi network is fast", 10),
|
||||
};
|
||||
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
|
||||
if (VERBOSE) {
|
||||
System.out.println("Results: " + results);
|
||||
|
@ -384,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("ab xc", 50),
|
||||
new TermFreq("ba xd", 50),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("ab xc", 50),
|
||||
new TermFreqPayload("ba xd", 50),
|
||||
};
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("ab x", false, 1);
|
||||
assertTrue(results.size() == 1);
|
||||
}
|
||||
|
@ -462,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 1),
|
||||
new TermFreq("x y z", 3),
|
||||
new TermFreq("x", 2),
|
||||
new TermFreq("z z z", 20),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 1),
|
||||
new TermFreqPayload("x y z", 3),
|
||||
new TermFreqPayload("x", 2),
|
||||
new TermFreqPayload("z z z", 20),
|
||||
}));
|
||||
|
||||
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
|
||||
|
@ -502,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = getUnusualAnalyzer();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 1),
|
||||
new TermFreq("x y z", 3),
|
||||
new TermFreq("x", 2),
|
||||
new TermFreq("z z z", 20),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 1),
|
||||
new TermFreqPayload("x y z", 3),
|
||||
new TermFreqPayload("x", 2),
|
||||
new TermFreqPayload("z z z", 20),
|
||||
}));
|
||||
|
||||
for(int topN=1;topN<6;topN++) {
|
||||
|
@ -657,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
boolean doPayloads = random().nextBoolean();
|
||||
|
||||
TermFreq[] keys = null;
|
||||
TermFreqPayload[] keys = null;
|
||||
TermFreqPayload[] payloadKeys = null;
|
||||
if (doPayloads) {
|
||||
payloadKeys = new TermFreqPayload[numQueries];
|
||||
} else {
|
||||
keys = new TermFreq[numQueries];
|
||||
keys = new TermFreqPayload[numQueries];
|
||||
}
|
||||
|
||||
boolean preserveSep = random().nextBoolean();
|
||||
|
@ -735,7 +733,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
payload = new BytesRef(bytes);
|
||||
payloadKeys[i] = new TermFreqPayload(key, weight, payload);
|
||||
} else {
|
||||
keys[i] = new TermFreq(key, weight);
|
||||
keys[i] = new TermFreqPayload(key, weight);
|
||||
payload = null;
|
||||
}
|
||||
|
||||
|
@ -758,7 +756,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
if (doPayloads) {
|
||||
suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys)));
|
||||
} else {
|
||||
suggester.build(new TermFreqArrayIterator(shuffle(keys)));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys)));
|
||||
}
|
||||
|
||||
for (String prefix : allPrefixes) {
|
||||
|
@ -876,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1);
|
||||
suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40),
|
||||
new TermFreq("a ", 50), new TermFreq(" a", 60))));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40),
|
||||
new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60))));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 5);
|
||||
assertEquals(2, results.size());
|
||||
|
@ -891,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a", 2),
|
||||
new TermFreq("a b c", 3),
|
||||
new TermFreq("a c a", 1),
|
||||
new TermFreq("a c b", 1),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a", 2),
|
||||
new TermFreqPayload("a b c", 3),
|
||||
new TermFreqPayload("a c a", 1),
|
||||
new TermFreqPayload("a c b", 1),
|
||||
}));
|
||||
|
||||
suggester.lookup("a", false, 4);
|
||||
|
@ -907,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a", 5),
|
||||
new TermFreq("a b", 3),
|
||||
new TermFreq("a c", 4),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a", 5),
|
||||
new TermFreqPayload("a b", 3),
|
||||
new TermFreqPayload("a c", 4),
|
||||
}));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 3);
|
||||
|
@ -972,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(shuffle(
|
||||
new TermFreq("hambone", 6),
|
||||
new TermFreq("nellie", 5))));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(shuffle(
|
||||
new TermFreqPayload("hambone", 6),
|
||||
new TermFreqPayload("nellie", 5))));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("nellie", false, 2);
|
||||
assertEquals(2, results.size());
|
||||
|
@ -1041,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a", 6),
|
||||
new TermFreq("b", 5),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a", 6),
|
||||
new TermFreqPayload("b", 5),
|
||||
}));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 2);
|
||||
|
@ -1114,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a a", 50),
|
||||
new TermFreq("a b", 50),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a a", 50),
|
||||
new TermFreqPayload("a b", 50),
|
||||
}));
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults3() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a a", 7),
|
||||
new TermFreq("a a", 7),
|
||||
new TermFreq("a c", 6),
|
||||
new TermFreq("a c", 3),
|
||||
new TermFreq("a b", 5),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a a", 7),
|
||||
new TermFreqPayload("a a", 7),
|
||||
new TermFreqPayload("a c", 6),
|
||||
new TermFreqPayload("a c", 3),
|
||||
new TermFreqPayload("a b", 5),
|
||||
}));
|
||||
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
|
||||
}
|
||||
|
@ -1136,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
public void testEndingSpace() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("i love lucy", 7),
|
||||
new TermFreq("isla de muerta", 8),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("i love lucy", 7),
|
||||
new TermFreqPayload("isla de muerta", 8),
|
||||
}));
|
||||
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
|
||||
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
|
||||
|
@ -1169,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {new TermFreq("a", 1)}));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)}));
|
||||
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
|
||||
}
|
||||
|
||||
public void testIllegalLookupArgument() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("а где Люси?", 7),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("а где Люси?", 7),
|
||||
}));
|
||||
try {
|
||||
suggester.lookup("а\u001E", false, 3);
|
||||
|
|
|
@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.TermFreq;
|
||||
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayload;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util;
|
|||
public class FuzzySuggesterTest extends LuceneTestCase {
|
||||
|
||||
public void testRandomEdits() throws IOException {
|
||||
List<TermFreq> keys = new ArrayList<TermFreq>();
|
||||
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
|
||||
int numTerms = atLeast(100);
|
||||
for (int i = 0; i < numTerms; i++) {
|
||||
keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
|
||||
keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
|
||||
}
|
||||
keys.add(new TermFreq("foo bar boo far", 12));
|
||||
keys.add(new TermFreqPayload("foo bar boo far", 12));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
int numIters = atLeast(10);
|
||||
for (int i = 0; i < numIters; i++) {
|
||||
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
|
||||
|
@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNonLatinRandomEdits() throws IOException {
|
||||
List<TermFreq> keys = new ArrayList<TermFreq>();
|
||||
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
|
||||
int numTerms = atLeast(100);
|
||||
for (int i = 0; i < numTerms; i++) {
|
||||
keys.add(new TermFreq("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
|
||||
keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
|
||||
}
|
||||
keys.add(new TermFreq("фуу бар буу фар", 12));
|
||||
keys.add(new TermFreqPayload("фуу бар буу фар", 12));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
int numIters = atLeast(10);
|
||||
for (int i = 0; i < numIters; i++) {
|
||||
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
|
||||
|
@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
|
||||
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
|
||||
public void testKeyword() throws Exception {
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("foo", 50),
|
||||
new TermFreq("bar", 10),
|
||||
new TermFreq("barbar", 12),
|
||||
new TermFreq("barbara", 6)
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("foo", 50),
|
||||
new TermFreqPayload("bar", 10),
|
||||
new TermFreqPayload("barbar", 12),
|
||||
new TermFreqPayload("barbara", 6)
|
||||
};
|
||||
|
||||
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2);
|
||||
assertEquals(2, results.size());
|
||||
|
@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
* basic "standardanalyzer" test with stopword removal
|
||||
*/
|
||||
public void testStandard() throws Exception {
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("the ghost of christmas past", 50),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("the ghost of christmas past", 50),
|
||||
};
|
||||
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
FuzzySuggester suggester = new FuzzySuggester(standard);
|
||||
suggester.setPreservePositionIncrements(false);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
|
||||
assertEquals(1, results.size());
|
||||
|
@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNoSeps() throws Exception {
|
||||
TermFreq[] keys = new TermFreq[] {
|
||||
new TermFreq("ab cd", 0),
|
||||
new TermFreq("abcd", 1),
|
||||
TermFreqPayload[] keys = new TermFreqPayload[] {
|
||||
new TermFreqPayload("ab cd", 0),
|
||||
new TermFreqPayload("abcd", 1),
|
||||
};
|
||||
|
||||
int options = 0;
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
// TODO: would be nice if "ab " would allow the test to
|
||||
// pass, and more generally if the analyzer can know
|
||||
// that the user's current query has ended at a word,
|
||||
|
@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("wifi network is slow", 50),
|
||||
new TermFreq("wi fi network is fast", 10),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("wifi network is slow", 50),
|
||||
new TermFreqPayload("wi fi network is fast", 10),
|
||||
};
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
|
||||
if (VERBOSE) {
|
||||
|
@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
|
||||
public void testEmpty() throws Exception {
|
||||
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
|
||||
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
|
@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("ab xc", 50),
|
||||
new TermFreq("ba xd", 50),
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("ab xc", 50),
|
||||
new TermFreqPayload("ba xd", 50),
|
||||
};
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("ab x", false, 1);
|
||||
assertTrue(results.size() == 1);
|
||||
}
|
||||
|
@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 1),
|
||||
new TermFreq("x y z", 3),
|
||||
new TermFreq("x", 2),
|
||||
new TermFreq("z z z", 20),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 1),
|
||||
new TermFreqPayload("x y z", 3),
|
||||
new TermFreqPayload("x", 2),
|
||||
new TermFreqPayload("z z z", 20),
|
||||
}));
|
||||
|
||||
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
|
||||
|
@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = getUnusualAnalyzer();
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 1),
|
||||
new TermFreq("x y z", 3),
|
||||
new TermFreq("x", 2),
|
||||
new TermFreq("z z z", 20),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 1),
|
||||
new TermFreqPayload("x y z", 3),
|
||||
new TermFreqPayload("x", 2),
|
||||
new TermFreqPayload("z z z", 20),
|
||||
}));
|
||||
|
||||
for(int topN=1;topN<6;topN++) {
|
||||
|
@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// Holds surface form separately:
|
||||
private static class TermFreq2 implements Comparable<TermFreq2> {
|
||||
private static class TermFreqPayload2 implements Comparable<TermFreqPayload2> {
|
||||
public final String surfaceForm;
|
||||
public final String analyzedForm;
|
||||
public final long weight;
|
||||
|
||||
public TermFreq2(String surfaceForm, String analyzedForm, long weight) {
|
||||
public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) {
|
||||
this.surfaceForm = surfaceForm;
|
||||
this.analyzedForm = analyzedForm;
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TermFreq2 other) {
|
||||
public int compareTo(TermFreqPayload2 other) {
|
||||
int cmp = analyzedForm.compareTo(other.analyzedForm);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
|
@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
|
||||
int numQueries = atLeast(100);
|
||||
|
||||
final List<TermFreq2> slowCompletor = new ArrayList<TermFreq2>();
|
||||
final List<TermFreqPayload2> slowCompletor = new ArrayList<TermFreqPayload2>();
|
||||
final TreeSet<String> allPrefixes = new TreeSet<String>();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
|
||||
TermFreq[] keys = new TermFreq[numQueries];
|
||||
TermFreqPayload[] keys = new TermFreqPayload[numQueries];
|
||||
|
||||
boolean preserveSep = random().nextBoolean();
|
||||
boolean unicodeAware = random().nextBoolean();
|
||||
|
@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
// we can probably do Integer.MAX_VALUE here, but why worry.
|
||||
int weight = random().nextInt(1<<24);
|
||||
keys[i] = new TermFreq(key, weight);
|
||||
keys[i] = new TermFreqPayload(key, weight);
|
||||
|
||||
slowCompletor.add(new TermFreq2(key, analyzedKey, weight));
|
||||
slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
// Don't just sort original list, to avoid VERBOSE
|
||||
// altering the test:
|
||||
List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
|
||||
List<TermFreqPayload2> sorted = new ArrayList<TermFreqPayload2>(slowCompletor);
|
||||
Collections.sort(sorted);
|
||||
for(TermFreq2 ent : sorted) {
|
||||
for(TermFreqPayload2 ent : sorted) {
|
||||
System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
|
||||
}
|
||||
}
|
||||
|
@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a,
|
||||
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
for (String prefix : allPrefixes) {
|
||||
|
||||
|
@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertTrue(automaton.isDeterministic());
|
||||
// TODO: could be faster... but its slowCompletor for a reason
|
||||
BytesRef spare = new BytesRef();
|
||||
for (TermFreq2 e : slowCompletor) {
|
||||
for (TermFreqPayload2 e : slowCompletor) {
|
||||
spare.copyChars(e.analyzedForm);
|
||||
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
|
||||
for (IntsRef intsRef : finiteStrings) {
|
||||
|
@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false);
|
||||
|
||||
List<TermFreq> keys = Arrays.asList(new TermFreq[] {
|
||||
new TermFreq("a", 40),
|
||||
new TermFreq("a ", 50),
|
||||
new TermFreq(" a", 60),
|
||||
List<TermFreqPayload> keys = Arrays.asList(new TermFreqPayload[] {
|
||||
new TermFreqPayload("a", 40),
|
||||
new TermFreqPayload("a ", 50),
|
||||
new TermFreqPayload(" a", 60),
|
||||
});
|
||||
|
||||
Collections.shuffle(keys, random());
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 5);
|
||||
assertEquals(2, results.size());
|
||||
|
@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false);
|
||||
|
||||
List<TermFreq> keys = Arrays.asList(new TermFreq[] {
|
||||
new TermFreq("foo bar", 40),
|
||||
new TermFreq("foo bar baz", 50),
|
||||
new TermFreq("barbaz", 60),
|
||||
new TermFreq("barbazfoo", 10),
|
||||
List<TermFreqPayload> keys = Arrays.asList(new TermFreqPayload[] {
|
||||
new TermFreqPayload("foo bar", 40),
|
||||
new TermFreqPayload("foo bar baz", 50),
|
||||
new TermFreqPayload("barbaz", 60),
|
||||
new TermFreqPayload("barbazfoo", 10),
|
||||
});
|
||||
|
||||
Collections.shuffle(keys, random());
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
|
||||
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
|
||||
|
@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
|
||||
public void testRandom2() throws Throwable {
|
||||
final int NUM = atLeast(200);
|
||||
final List<TermFreq> answers = new ArrayList<TermFreq>();
|
||||
final List<TermFreqPayload> answers = new ArrayList<TermFreqPayload>();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
for(int i=0;i<NUM;i++) {
|
||||
final String s = randomSimpleString(8);
|
||||
if (!seen.contains(s)) {
|
||||
answers.add(new TermFreq(s, random().nextInt(1000)));
|
||||
answers.add(new TermFreqPayload(s, random().nextInt(1000)));
|
||||
seen.add(s);
|
||||
}
|
||||
}
|
||||
|
||||
Collections.sort(answers, new Comparator<TermFreq>() {
|
||||
Collections.sort(answers, new Comparator<TermFreqPayload>() {
|
||||
@Override
|
||||
public int compare(TermFreq a, TermFreq b) {
|
||||
public int compare(TermFreqPayload a, TermFreqPayload b) {
|
||||
return a.term.compareTo(b.term);
|
||||
}
|
||||
});
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: targets");
|
||||
for(TermFreq tf : answers) {
|
||||
for(TermFreqPayload tf : answers) {
|
||||
System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v);
|
||||
}
|
||||
}
|
||||
|
@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
Collections.shuffle(answers, random());
|
||||
suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()])));
|
||||
suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()])));
|
||||
|
||||
final int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
|
@ -1004,10 +1004,10 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreq> answers, String frag) {
|
||||
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreqPayload> answers, String frag) {
|
||||
final List<LookupResult> results = new ArrayList<LookupResult>();
|
||||
final int fragLen = frag.length();
|
||||
for(TermFreq tf : answers) {
|
||||
for(TermFreqPayload tf : answers) {
|
||||
//System.out.println(" check s=" + tf.term.utf8ToString());
|
||||
boolean prefixMatches = true;
|
||||
for(int i=0;i<prefixLen;i++) {
|
||||
|
|
|
@ -41,10 +41,10 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.TermFreq;
|
||||
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayload;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -54,14 +54,14 @@ import org.junit.Ignore;
|
|||
public class TestFreeTextSuggester extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo bar baz blah", 50),
|
||||
new TermFreq("boo foo bar foo bee", 20)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo bar baz blah", 50),
|
||||
new TermFreqPayload("boo foo bar foo bee", 20)
|
||||
);
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
for(int i=0;i<2;i++) {
|
||||
|
||||
|
@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
public void testIllegalByteDuringBuild() throws Exception {
|
||||
// Default separator is INFORMATION SEPARATOR TWO
|
||||
// (0x1e), so no input token is allowed to contain it
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo\u001ebar baz", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo\u001ebar baz", 50)
|
||||
);
|
||||
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
|
||||
try {
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
fail("did not hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
|
@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
public void testIllegalByteDuringQuery() throws Exception {
|
||||
// Default separator is INFORMATION SEPARATOR TWO
|
||||
// (0x1e), so no input token is allowed to contain it
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo bar baz", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo bar baz", 50)
|
||||
);
|
||||
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
try {
|
||||
sug.lookup("foo\u001eb", 10);
|
||||
|
@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
// Skip header:
|
||||
lfd.nextDoc();
|
||||
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
|
||||
sug.build(new TermFreqIterator() {
|
||||
sug.build(new TermFreqPayloadIterator() {
|
||||
|
||||
private int count;
|
||||
|
||||
|
@ -161,6 +161,16 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
}
|
||||
return new BytesRef(doc.get("body"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
if (VERBOSE) {
|
||||
System.out.println(sug.sizeInBytes() + " bytes");
|
||||
|
@ -175,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
|
||||
// Make sure you can suggest based only on unigram model:
|
||||
public void testUnigrams() throws Exception {
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo bar baz blah boo foo bar foo bee", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50)
|
||||
);
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
// Sorts first by count, descending, second by term, ascending
|
||||
assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11",
|
||||
toString(sug.lookup("b", 10)));
|
||||
|
@ -189,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
|
||||
// Make sure the last token is not duplicated
|
||||
public void testNoDupsAcrossGrams() throws Exception {
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo bar bar bar bar", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo bar bar bar bar", 50)
|
||||
);
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
assertEquals("foo bar/1.00",
|
||||
toString(sug.lookup("foo b", 10)));
|
||||
}
|
||||
|
||||
// Lookup of just empty string produces unicode only matches:
|
||||
public void testEmptyString() throws Exception {
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("foo bar bar bar bar", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("foo bar bar bar bar", 50)
|
||||
);
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
try {
|
||||
sug.lookup("", 10);
|
||||
fail("did not hit exception");
|
||||
|
@ -228,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("wizard of oz", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("wizard of oz", 50)
|
||||
);
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
assertEquals("wizard _ oz/1.00",
|
||||
toString(sug.lookup("wizard of", 10)));
|
||||
|
||||
|
@ -256,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
Iterable<TermFreq> keys = shuffle(
|
||||
new TermFreq("wizard of of oz", 50)
|
||||
Iterable<TermFreqPayload> keys = shuffle(
|
||||
new TermFreqPayload("wizard of of oz", 50)
|
||||
);
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
|
||||
sug.build(new TermFreqArrayIterator(keys));
|
||||
sug.build(new TermFreqPayloadArrayIterator(keys));
|
||||
assertEquals("",
|
||||
toString(sug.lookup("wizard of of", 10)));
|
||||
}
|
||||
|
@ -320,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
|
||||
// Build suggester model:
|
||||
FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20);
|
||||
sug.build(new TermFreqIterator() {
|
||||
sug.build(new TermFreqPayloadIterator() {
|
||||
int upto;
|
||||
|
||||
@Override
|
||||
|
@ -342,6 +352,16 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
|||
public long weight() {
|
||||
return random().nextLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
// Build inefficient but hopefully correct model:
|
||||
|
|
|
@ -28,8 +28,8 @@ import org.apache.lucene.util.*;
|
|||
* Unit tests for {@link FSTCompletion}.
|
||||
*/
|
||||
public class FSTCompletionTest extends LuceneTestCase {
|
||||
public static TermFreq tf(String t, int v) {
|
||||
return new TermFreq(t, v);
|
||||
public static TermFreqPayload tf(String t, int v) {
|
||||
return new TermFreqPayload(t, v);
|
||||
}
|
||||
|
||||
private FSTCompletion completion;
|
||||
|
@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
super.setUp();
|
||||
|
||||
FSTCompletionBuilder builder = new FSTCompletionBuilder();
|
||||
for (TermFreq tf : evalKeys()) {
|
||||
for (TermFreqPayload tf : evalKeys()) {
|
||||
builder.add(tf.term, (int) tf.v);
|
||||
}
|
||||
completion = builder.build();
|
||||
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
|
||||
}
|
||||
|
||||
private TermFreq[] evalKeys() {
|
||||
final TermFreq[] keys = new TermFreq[] {
|
||||
private TermFreqPayload[] evalKeys() {
|
||||
final TermFreqPayload[] keys = new TermFreqPayload[] {
|
||||
tf("one", 0),
|
||||
tf("oneness", 1),
|
||||
tf("onerous", 1),
|
||||
|
@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
|
||||
|
||||
Random r = random();
|
||||
List<TermFreq> keys = new ArrayList<TermFreq>();
|
||||
List<TermFreqPayload> keys = new ArrayList<TermFreqPayload>();
|
||||
for (int i = 0; i < 5000; i++) {
|
||||
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
|
||||
keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1));
|
||||
}
|
||||
|
||||
lookup.build(new TermFreqArrayIterator(keys));
|
||||
lookup.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
// All the weights were constant, so all returned buckets must be constant, whatever they
|
||||
// are.
|
||||
Long previous = null;
|
||||
for (TermFreq tf : keys) {
|
||||
for (TermFreqPayload tf : keys) {
|
||||
Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
|
||||
if (previous != null) {
|
||||
assertEquals(previous, current);
|
||||
|
@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testMultilingualInput() throws Exception {
|
||||
List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
|
||||
List<TermFreqPayload> input = LookupBenchmarkTest.readTop50KWiki();
|
||||
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
||||
lookup.build(new TermFreqArrayIterator(input));
|
||||
for (TermFreq tf : input) {
|
||||
lookup.build(new TermFreqPayloadArrayIterator(input));
|
||||
for (TermFreqPayload tf : input) {
|
||||
assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
|
||||
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());
|
||||
}
|
||||
|
@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
List<TermFreq> freqs = new ArrayList<TermFreq>();
|
||||
List<TermFreqPayload> freqs = new ArrayList<TermFreqPayload>();
|
||||
Random rnd = random();
|
||||
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
|
||||
int weight = rnd.nextInt(100);
|
||||
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
|
||||
freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight));
|
||||
}
|
||||
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
||||
lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
|
||||
lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()])));
|
||||
|
||||
for (TermFreq tf : freqs) {
|
||||
for (TermFreqPayload tf : freqs) {
|
||||
final String term = tf.term.utf8ToString();
|
||||
for (int i = 1; i < term.length(); i++) {
|
||||
String prefix = term.substring(0, i);
|
||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst;
|
|||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.TermFreq;
|
||||
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayload;
|
||||
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil;
|
|||
public class WFSTCompletionTest extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
TermFreq keys[] = new TermFreq[] {
|
||||
new TermFreq("foo", 50),
|
||||
new TermFreq("bar", 10),
|
||||
new TermFreq("barbar", 12),
|
||||
new TermFreq("barbara", 6)
|
||||
TermFreqPayload keys[] = new TermFreqPayload[] {
|
||||
new TermFreqPayload("foo", 50),
|
||||
new TermFreqPayload("bar", 10),
|
||||
new TermFreqPayload("barbar", 12),
|
||||
new TermFreqPayload("barbara", 6)
|
||||
};
|
||||
|
||||
Random random = new Random(random().nextLong());
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
// top N of 2, but only foo is available
|
||||
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
|
||||
|
@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 20),
|
||||
new TermFreq("x", 2),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 20),
|
||||
new TermFreqPayload("x", 2),
|
||||
}));
|
||||
|
||||
for(int topN=1;topN<4;topN++) {
|
||||
|
@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("x y", 20),
|
||||
new TermFreq("x", 2),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload("x y", 20),
|
||||
new TermFreqPayload("x", 2),
|
||||
}));
|
||||
|
||||
for(int topN=1;topN<4;topN++) {
|
||||
|
@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
|
||||
final TreeSet<String> allPrefixes = new TreeSet<String>();
|
||||
|
||||
TermFreq[] keys = new TermFreq[numWords];
|
||||
TermFreqPayload[] keys = new TermFreqPayload[numWords];
|
||||
|
||||
for (int i = 0; i < numWords; i++) {
|
||||
String s;
|
||||
|
@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
// we can probably do Integer.MAX_VALUE here, but why worry.
|
||||
int weight = random().nextInt(1<<24);
|
||||
slowCompletor.put(s, (long)weight);
|
||||
keys[i] = new TermFreq(s, weight);
|
||||
keys[i] = new TermFreqPayload(s, weight);
|
||||
}
|
||||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
suggester.build(new TermFreqArrayIterator(keys));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||
|
||||
Random random = new Random(random().nextLong());
|
||||
for (String prefix : allPrefixes) {
|
||||
|
@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq(key1, 50),
|
||||
new TermFreq(key2, 50),
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
|
||||
new TermFreqPayload(key1, 50),
|
||||
new TermFreqPayload(key2, 50),
|
||||
}));
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
|
||||
suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue