LUCENE-3807: Clean up Suggest API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293148 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-02-24 09:49:39 +00:00
parent 61387fe283
commit f29eda768d
40 changed files with 289 additions and 243 deletions

View File

@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}

View File

@ -654,7 +654,7 @@ public class DocTermOrds {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return termsEnum.getComparator();
}

View File

@ -174,7 +174,7 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return in.getComparator();
}

View File

@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum extends TermsEnum {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return tenum.getComparator();
}

View File

@ -179,13 +179,6 @@ public abstract class TermsEnum implements BytesRefIterator {
}
};
}
/** Return the {@link BytesRef} Comparator used to sort
* terms provided by the iterator. This may return
* null if there are no terms. Callers may invoke this
* method many times, so it's best to cache a single
* instance & reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}

View File

@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCache {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}

View File

@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return actualEnum.getComparator();
}

View File

@ -233,13 +233,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
final byte[] bBytes = b.bytes;
int bUpto = b.offset;
final int aStop;
if (a.length < b.length) {
aStop = aUpto + a.length;
} else {
aStop = aUpto + b.length;
}
final int aStop = aUpto + Math.min(a.length, b.length);
while(aUpto < aStop) {
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.io.IOException;
import java.util.Comparator;
/**
* A simple iterator interface for {@link BytesRef} iteration
@ -40,6 +41,14 @@ public interface BytesRefIterator {
*/
public BytesRef next() throws IOException;
/**
* Return the {@link BytesRef} Comparator used to sort terms provided by the
* iterator. This may return null if there are no items or the iterator is not
* sorted. Callers may invoke this method many times, so it's best to cache a
* single instance & reuse it.
*/
public Comparator<BytesRef> getComparator();
public final static class EmptyBytesRefIterator implements BytesRefIterator {
@Override
@ -47,6 +56,10 @@ public interface BytesRefIterator {
return null;
}
public Comparator<BytesRef> getComparator() {
return null;
}
}
}

View File

@ -26,6 +26,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
import java.nio.CharBuffer;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
@ -707,4 +708,23 @@ public class _TestUtil {
}
return termsEnum.docs(liveDocs, null, needsFreqs);
}
public static CharSequence stringToCharSequence(String string, Random random) {
return bytesToCharSequence(new BytesRef(string), random);
}
public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
switch(random.nextInt(5)) {
case 4:
CharsRef chars = new CharsRef(ref.length);
UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
return chars;
case 3:
return CharBuffer.wrap(ref.utf8ToString());
default:
return ref.utf8ToString();
}
}
}

View File

@ -16,6 +16,7 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.BytesRefIterator;
/**
@ -30,5 +31,5 @@ public interface Dictionary {
* Return all words present in the dictionary
* @return Iterator
*/
BytesRefIterator getWordsIterator();
BytesRefIterator getWordsIterator() throws IOException;
}

View File

@ -19,16 +19,13 @@ package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
/**
* HighFrequencyDictionary: terms taken from the given field
@ -44,7 +41,6 @@ public class HighFrequencyDictionary implements Dictionary {
private IndexReader reader;
private String field;
private float thresh;
private final CharsRef spare = new CharsRef();
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader;
@ -52,66 +48,55 @@ public class HighFrequencyDictionary implements Dictionary {
this.thresh = thresh;
}
public final BytesRefIterator getWordsIterator() {
public final BytesRefIterator getWordsIterator() throws IOException {
return new HighFrequencyIterator();
}
final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
final class HighFrequencyIterator implements TermFreqIterator {
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
private long freq;
HighFrequencyIterator() {
try {
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
minNumDocs = (int)(thresh * (float)reader.numDocs());
} catch (IOException e) {
throw new RuntimeException(e);
HighFrequencyIterator() throws IOException {
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
minNumDocs = (int)(thresh * (float)reader.numDocs());
}
private boolean isFrequent(int freq) {
return freq >= minNumDocs;
}
public float freq() {
try {
return termsEnum.docFreq();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
public long weight() {
return freq;
}
@Override
public BytesRef next() throws IOException {
if (termsEnum != null) {
BytesRef next;
while ((next = termsEnum.next()) != null) {
while((next = termsEnum.next()) != null) {
if (isFrequent(termsEnum.docFreq())) {
freq = termsEnum.docFreq();
spare.copyBytes(next);
return spare;
}
}
}
}
return null;
}
@Override
public Comparator<BytesRef> comparator() {
try {
if (termsEnum == null) {
return null;
} else {
return termsEnum.getComparator();
}
} catch (IOException e) {
throw new RuntimeException(e);
public Comparator<BytesRef> getComparator() {
if (termsEnum == null) {
return null;
} else {
return termsEnum.getComparator();
}
}
}

View File

@ -43,17 +43,12 @@ public class LuceneDictionary implements Dictionary {
this.field = field;
}
public final BytesRefIterator getWordsIterator() {
try {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
return terms.iterator(null);
} else {
return BytesRefIterator.EMPTY_ITERATOR;
}
} catch (IOException e) {
throw new RuntimeException(e);
public final BytesRefIterator getWordsIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
return terms.iterator(null);
} else {
return BytesRefIterator.EMPTY_ITERATOR;
}
}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search.spell;
*/
import java.util.Iterator;
import java.util.Comparator;
import java.io.*;
import org.apache.lucene.util.BytesRef;
@ -53,7 +53,7 @@ public class PlainTextDictionary implements Dictionary {
in = new BufferedReader(reader);
}
public BytesRefIterator getWordsIterator() {
public BytesRefIterator getWordsIterator() throws IOException {
return new FileIterator();
}
@ -85,6 +85,11 @@ public class PlainTextDictionary implements Dictionary {
}
return result;
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
}
}

View File

@ -1,33 +0,0 @@
package org.apache.lucene.search.spell;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
/**
* Marker interface to signal that elements coming from {@link Iterator}
* come in ascending lexicographic order.
*/
public interface SortedIterator {
public Comparator<BytesRef> comparator();
}

View File

@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
public interface TermFreqIterator extends BytesRefIterator {
public float freq();
public long weight();
public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped;
@ -32,12 +34,17 @@ public interface TermFreqIterator extends BytesRefIterator {
this.wrapped = wrapped;
}
public float freq() {
return 1.0f;
public long weight() {
return 1;
}
public BytesRef next() throws IOException {
return wrapped.next();
}
@Override
public Comparator<BytesRef> getComparator() {
return wrapped.getComparator();
}
}
}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements.
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
// TODO keep this for now
protected BytesRefList entries = new BytesRefList();
protected int curPos = -1;
protected float[] freqs = new float[1];
protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
this.comp = source.getComparator();
BytesRef spare;
int freqIndex = 0;
while((spare = source.next()) != null) {
@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
if (freqIndex >= freqs.length) {
freqs = ArrayUtil.grow(freqs, freqs.length+1);
}
freqs[freqIndex++] = source.freq();
freqs[freqIndex++] = source.weight();
}
}
public float freq() {
public long weight() {
return freqs[curPos];
}
@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
return null;
}
@Override
public Comparator<BytesRef> getComparator() {
return comp;
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
@ -83,6 +84,11 @@ final class BytesRefList {
}
return null;
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
};
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.*;
import java.util.Comparator;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
@ -56,11 +57,11 @@ public class FileDictionary implements Dictionary {
}
final class FileIterator implements TermFreqIterator {
private float curFreq;
private long curFreq;
private final BytesRef spare = new BytesRef();
public float freq() {
public long weight() {
return curFreq;
}
@ -73,7 +74,8 @@ public class FileDictionary implements Dictionary {
if (line != null) {
String[] fields = line.split("\t");
if (fields.length > 1) {
curFreq = Float.parseFloat(fields[1]);
// keep reading floats for bw compat
curFreq = (int)Float.parseFloat(fields[1]);
spare.copyChars(fields[0]);
} else {
spare.copyChars(line);
@ -86,6 +88,11 @@ public class FileDictionary implements Dictionary {
return null;
}
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
}
}

View File

@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
@ -33,10 +34,10 @@ public abstract class Lookup {
* Result of a lookup.
*/
public static final class LookupResult implements Comparable<LookupResult> {
public final String key;
public final CharSequence key;
public final float value;
public LookupResult(String key, float value) {
public LookupResult(CharSequence key, float value) {
this.key = key;
this.value = value;
}
@ -48,10 +49,32 @@ public abstract class Lookup {
/** Compare alphabetically. */
public int compareTo(LookupResult o) {
return this.key.compareTo(o.key);
return CHARSEQUENCE_COMPARATOR.compare(key, o.key);
}
}
public static final Comparator<CharSequence> CHARSEQUENCE_COMPARATOR = new CharSequenceComparator();
private static class CharSequenceComparator implements Comparator<CharSequence> {
@Override
public int compare(CharSequence o1, CharSequence o2) {
final int l1 = o1.length();
final int l2 = o2.length();
final int aStop = Math.min(l1, l2);
for (int i = 0; i < aStop; i++) {
int diff = o1.charAt(i) - o2.charAt(i);
if (diff != 0) {
return diff;
}
}
// One is a prefix of the other, or, they are equal:
return l1 - l2;
}
}
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
public LookupPriorityQueue(int size) {
@ -99,8 +122,7 @@ public abstract class Lookup {
* @param num maximum number of results to return
* @return a list of possible completions, with their relative weight (e.g. popularity)
*/
// TODO: this should be a BytesRef API?
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
/**
* Modify the lookup data by recording additional data. Optional operation.
@ -109,16 +131,14 @@ public abstract class Lookup {
* @return true if new key is added, false if it already exists or operation
* is not supported.
*/
// TODO: this should be a BytesRef API?
public abstract boolean add(String key, Object value);
public abstract boolean add(CharSequence key, Object value);
/**
* Get value associated with a specific key.
* @param key lookup key
* @return associated value
*/
// TODO: this should be a BytesRef API?
public abstract Object get(String key);
public abstract Object get(CharSequence key);
/**
* Persist the constructed lookup data to a directory. Optional operation.

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements and makes sure they are sorted in
* ascending lexicographic order.
*/
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
// TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
private final int[] sortedOrds;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
super(source);
@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
}
@Override
public float freq() {
public long weight() {
return freqs[currentOrd];
}
@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
}
@Override
public Comparator<BytesRef> comparator() {
public Comparator<BytesRef> getComparator() {
return comp;
}
}

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
* random order.
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
// TODO keep this for now
private final int[] ords;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
}
@Override
public float freq() {
public long weight() {
return freqs[currentOrd];
}

View File

@ -199,7 +199,7 @@ public class FSTCompletion {
* @return Returns the suggestions, sorted by their approximated weight first
* (decreasing) and then alphabetically (UTF-8 codepoint order).
*/
public List<Completion> lookup(String key, int num) {
public List<Completion> lookup(CharSequence key, int num) {
if (key.length() == 0 || automaton == null) {
return EMPTY_RESULT;
}
@ -388,7 +388,7 @@ public class FSTCompletion {
* Returns the bucket assigned to a given key (if found) or <code>null</code> if
* no exact match exists.
*/
public Integer getBucket(String key) {
public Integer getBucket(CharSequence key) {
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
}

View File

@ -46,7 +46,7 @@ import org.apache.lucene.util.fst.*;
* </ul>
*
* <p>
* At runtime, in {@link FSTCompletion#lookup(String, int)},
* At runtime, in {@link FSTCompletion#lookup(CharSequence, int)},
* the automaton is utilized as follows:
* <ul>
* <li>For each possible term weight encoded in the automaton (cached arcs from

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutputs;
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
* to discretize any "weights" as passed from in {@link TermFreqIterator#freq()}
* to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@ -171,7 +171,7 @@ public class FSTCompletionLookup extends Lookup {
}
output.reset(buffer);
output.writeInt(FloatMagic.toSortable(tfit.freq()));
output.writeInt(FloatMagic.toSortable(tfit.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
@ -232,7 +232,7 @@ public class FSTCompletionLookup extends Lookup {
}
@Override
public List<LookupResult> lookup(String key, boolean higherWeightsFirst, int num) {
public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
final List<Completion> completions;
if (higherWeightsFirst) {
completions = higherWeightsCompletion.lookup(key, num);
@ -241,20 +241,23 @@ public class FSTCompletionLookup extends Lookup {
}
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
CharsRef spare = new CharsRef();
for (Completion c : completions) {
results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket));
spare.grow(c.utf8.length);
UnicodeUtil.UTF8toUTF16(c.utf8, spare);
results.add(new LookupResult(spare.toString(), c.bucket));
}
return results;
}
@Override
public boolean add(String key, Object value) {
public boolean add(CharSequence key, Object value) {
// Not supported.
return false;
}
@Override
public Float get(String key) {
public Object get(CharSequence key) {
Integer bucket = normalCompletion.getBucket(key);
if (bucket == null)
return null;

View File

@ -33,8 +33,10 @@ import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
@ -121,7 +123,7 @@ public class WFSTCompletionLookup extends Lookup {
output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeByte((byte)0); // separator: not used, just for sort order
output.writeInt((int)encodeWeight(iterator.freq()));
output.writeInt((int)encodeWeight(iterator.weight()));
writer.write(buffer, 0, output.getPosition());
}
writer.close();
@ -200,7 +202,7 @@ public class WFSTCompletionLookup extends Lookup {
}
@Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
assert num > 0;
BytesRef scratch = new BytesRef(key);
int prefixLength = scratch.length;
@ -217,8 +219,11 @@ public class WFSTCompletionLookup extends Lookup {
}
List<LookupResult> results = new ArrayList<LookupResult>(num);
CharsRef spare = new CharsRef();
if (exactFirst && arc.isFinal()) {
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
spare.grow(scratch.length);
UnicodeUtil.UTF8toUTF16(scratch, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
if (--num == 0) {
return results; // that was quick
}
@ -236,8 +241,9 @@ public class WFSTCompletionLookup extends Lookup {
// append suffix
Util.toBytesRef(completion.input, suffix);
scratch.append(suffix);
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output)));
spare.grow(scratch.length);
UnicodeUtil.UTF8toUTF16(scratch, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output)));
}
return results;
}
@ -264,7 +270,7 @@ public class WFSTCompletionLookup extends Lookup {
}
@Override
public boolean add(String key, Object value) {
public boolean add(CharSequence key, Object value) {
return false; // Not supported.
}
@ -273,7 +279,7 @@ public class WFSTCompletionLookup extends Lookup {
* or null if it does not exist.
*/
@Override
public Float get(String key) {
public Object get(CharSequence key) {
Arc<Long> arc = new Arc<Long>();
Long result = null;
try {

View File

@ -28,7 +28,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
@ -45,7 +44,7 @@ public class JaspellLookup extends Lookup {
@Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof SortedIterator) {
if (tfit.getComparator() != null) {
// make sure it's unsorted
// WTF - this could result in yet another sorted iteration....
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
@ -56,7 +55,7 @@ public class JaspellLookup extends Lookup {
final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
float freq = tfit.freq();
float freq = tfit.weight();
if (spare.length == 0) {
continue;
}
@ -67,19 +66,19 @@ public class JaspellLookup extends Lookup {
}
@Override
public boolean add(String key, Object value) {
public boolean add(CharSequence key, Object value) {
trie.put(key, value);
// XXX
return false;
}
@Override
public Object get(String key) {
public Object get(CharSequence key) {
return trie.get(key);
}
@Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<LookupResult> res = new ArrayList<LookupResult>();
List<String> list;
int count = onlyMorePopular ? num * 2 : num;
@ -97,7 +96,7 @@ public class JaspellLookup extends Lookup {
LookupPriorityQueue queue = new LookupPriorityQueue(num);
for (String s : list) {
float freq = (Float)trie.get(s);
queue.insertWithOverflow(new LookupResult(s, freq));
queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
}
for (LookupResult lr : queue.getResults()) {
res.add(lr);
@ -106,7 +105,7 @@ public class JaspellLookup extends Lookup {
for (int i = 0; i < maxCnt; i++) {
String s = list.get(i);
float freq = (Float)trie.get(s);
res.add(new LookupResult(s, freq));
res.add(new LookupResult(new CharsRef(s), freq));
}
}
return res;

View File

@ -368,8 +368,8 @@ public class JaspellTernarySearchTrie {
* A <code>String</code> index.
*@return The object retrieved from the Ternary Search Trie.
*/
public Object get(String key) {
TSTNode node = getNode(key.trim().toLowerCase());
public Object get(CharSequence key) {
TSTNode node = getNode(key);
if (node == null) {
return null;
}
@ -435,7 +435,7 @@ public class JaspellTernarySearchTrie {
*@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>.
*/
public TSTNode getNode(String key) {
public TSTNode getNode(CharSequence key) {
return getNode(key, rootNode);
}
@ -443,15 +443,14 @@ public class JaspellTernarySearchTrie {
* Returns the node indexed by key, or <code>null</code> if that node doesn't
* exist. The search begins at root node.
*
*@param key2
*@param key
* A <code>String</code> that indexes the node that is returned.
*@param startNode
* The top node defining the subtrie to be searched.
*@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>.
*/
protected TSTNode getNode(String key2, TSTNode startNode) {
String key = key2.trim().toLowerCase();
protected TSTNode getNode(CharSequence key, TSTNode startNode) {
if (key == null || startNode == null || key.length() == 0) {
return null;
}
@ -490,7 +489,7 @@ public class JaspellTernarySearchTrie {
*@exception IllegalArgumentException
* If the key is an empty <code>String</code>.
*/
protected TSTNode getOrCreateNode(String key) throws NullPointerException,
protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException,
IllegalArgumentException {
if (key == null) {
throw new NullPointerException(
@ -568,7 +567,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned by this method.
*@return A <code>List</code> with the results
*/
public List<String> matchAlmost(String key, int numReturnValues) {
public List<String> matchAlmost(CharSequence key, int numReturnValues) {
return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
}
@ -598,7 +597,7 @@ public class JaspellTernarySearchTrie {
*@return A <code>List</code> with the results.
*/
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
int d, String matchAlmostKey, int matchAlmostNumReturnValues,
int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues,
List<String> matchAlmostResult2, boolean upTo) {
if ((currentNode == null)
|| (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
@ -658,7 +657,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned from this method.
*@return A <code>List</code> with the results
*/
public List<String> matchPrefix(String prefix, int numReturnValues) {
public List<String> matchPrefix(CharSequence prefix, int numReturnValues) {
Vector<String> sortKeysResult = new Vector<String>();
TSTNode startNode = getNode(prefix);
if (startNode == null) {
@ -722,8 +721,8 @@ public class JaspellTernarySearchTrie {
*@param value
* The object to be stored in the Trie.
*/
public void put(String key, Object value) {
getOrCreateNode(key.trim().toLowerCase()).data = value;
public void put(CharSequence key, Object value) {
getOrCreateNode(key).data = value;
}
/**

View File

@ -57,7 +57,7 @@ public class TSTAutocomplete {
* index of character in key to be inserted currently.
* @return currentNode The new reference to root node of TST
*/
public TernaryTreeNode insert(TernaryTreeNode currentNode, String s,
public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s,
Object val, int x) {
if (s == null || s.length() <= x) {
return currentNode;
@ -69,7 +69,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else {
currentNode.token = s;
currentNode.token = s.toString();
currentNode.val = val;
return currentNode;
}
@ -79,7 +79,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else {
currentNode.token = s;
currentNode.token = s.toString();
currentNode.val = val;
return currentNode;
}
@ -104,7 +104,7 @@ public class TSTAutocomplete {
* @return suggest list of auto-completed keys for the given prefix query.
*/
public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root,
String s, int x) {
CharSequence s, int x) {
TernaryTreeNode p = root;
ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>();

View File

@ -30,7 +30,6 @@ import java.util.List;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
@ -45,7 +44,7 @@ public class TSTLookup extends Lookup {
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
}
@ -58,34 +57,47 @@ public class TSTLookup extends Lookup {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
vals.add(new Float(tfit.freq()));
vals.add(new Float(tfit.weight()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
@Override
public boolean add(String key, Object value) {
public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created
return true;
}
@Override
public Object get(String key) {
public Object get(CharSequence key) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) {
return null;
}
for (TernaryTreeNode n : list) {
if (n.token.equals(key)) {
if (charSeqEquals(n.token, key)) {
return n.val;
}
}
return null;
}
private static boolean charSeqEquals(CharSequence left, CharSequence right) {
int len = left.length();
if (len != right.length()) {
return false;
}
for (int i = 0; i < len; i++) {
if (left.charAt(i) != right.charAt(i)) {
return false;
}
}
return true;
}
@Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
List<LookupResult> res = new ArrayList<LookupResult>();
if (list == null || list.size() == 0) {

View File

@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
while ((line = br.readLine()) != null) {
int tab = line.indexOf('|');
assertTrue("No | separator?: " + line, tab >= 0);
float weight = Float.parseFloat(line.substring(tab + 1));
int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
input.add(new TermFreq(key, weight));
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class PersistenceTest extends LuceneTestCase {
public final String[] keys = new String[] {
@ -61,7 +62,7 @@ public class PersistenceTest extends LuceneTestCase {
Lookup lookup = lookupClass.newInstance();
TermFreq[] keys = new TermFreq[this.keys.length];
for (int i = 0; i < keys.length; i++)
keys[i] = new TermFreq(this.keys[i], (float) i);
keys[i] = new TermFreq(this.keys[i], i);
lookup.build(new TermFreqArrayIterator(keys));
// Store the suggester.
@ -75,7 +76,7 @@ public class PersistenceTest extends LuceneTestCase {
// Assert validity.
float previous = Float.NEGATIVE_INFINITY;
for (TermFreq k : keys) {
Float val = (Float) lookup.get(k.term.utf8ToString());
Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
assertNotNull(k.term.utf8ToString(), val);
if (supportsExactWeights) {

View File

@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef;
public final class TermFreq {
public final BytesRef term;
public final float v;
public final long v;
public TermFreq(String term, float v) {
public TermFreq(String term, long v) {
this(new BytesRef(term), v);
}
public TermFreq(BytesRef term, float v) {
public TermFreq(BytesRef term, long v) {
this.term = term;
this.v = v;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator;
@ -44,7 +45,7 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
this(i.iterator());
}
public float freq() {
public long weight() {
return current.v;
}
@ -57,4 +58,9 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
}
return null;
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
}

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;

View File

@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
@ -37,8 +36,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
BytesRefIterator tf = dictionary.getWordsIterator();
assertTrue(tf instanceof SortedIterator);
((SortedIterator)tf).comparator();
assertNull(tf.getComparator());
assertNull(tf.next());
dir.close();
}

View File

@ -38,7 +38,7 @@ public class TestTermFreqIterator extends LuceneTestCase {
public void testTerms() throws Exception {
int num = atLeast(10000);
TreeMap<BytesRef,Float> sorted = new TreeMap<BytesRef,Float>();
TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
TermFreq[] unsorted = new TermFreq[num];
for (int i = 0; i < num; i++) {
@ -46,28 +46,28 @@ public class TestTermFreqIterator extends LuceneTestCase {
do {
key = new BytesRef(_TestUtil.randomUnicodeString(random));
} while (sorted.containsKey(key));
float value = random.nextFloat();
long value = random.nextLong();
sorted.put(key, value);
unsorted[i] = new TermFreq(key, value);
}
// test the sorted iterator wrapper
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
Iterator<Map.Entry<BytesRef,Float>> expected = sorted.entrySet().iterator();
Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry<BytesRef,Float> entry = expected.next();
Map.Entry<BytesRef,Long> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F);
assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
}
assertNull(wrapper.next());
// test the unsorted iterator wrapper
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
TreeMap<BytesRef,Float> actual = new TreeMap<BytesRef,Float>();
TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
BytesRef key;
while ((key = wrapper.next()) != null) {
float value = wrapper.freq();
long value = wrapper.weight();
actual.put(BytesRef.deepCopyOf(key), value);
}
assertEquals(sorted, actual);

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
public static TermFreq tf(String t, float v) {
public static TermFreq tf(String t, int v) {
return new TermFreq(t, v);
}
@ -62,28 +62,28 @@ public class FSTCompletionTest extends LuceneTestCase {
tf("foundation", 1),
tf("fourblah", 1),
tf("fourteen", 1),
tf("four", 0f),
tf("fourier", 0f),
tf("fourty", 0f),
tf("four", 0),
tf("fourier", 0),
tf("fourty", 0),
tf("xo", 1),
};
return keys;
}
public void testExactMatchHighPriority() throws Exception {
assertMatchEquals(completion.lookup("two", 1),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("two", random), 1),
"two/1.0");
}
public void testExactMatchLowPriority() throws Exception {
assertMatchEquals(completion.lookup("one", 2),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
}
public void testExactMatchReordering() throws Exception {
// Check reordering of exact matches.
assertMatchEquals(completion.lookup("four", 4),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
@ -92,49 +92,49 @@ public class FSTCompletionTest extends LuceneTestCase {
public void testRequestedCount() throws Exception {
// 'one' is promoted after collecting two higher ranking results.
assertMatchEquals(completion.lookup("one", 2),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
// 'four' is collected in a bucket and then again as an exact match.
assertMatchEquals(completion.lookup("four", 2),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 2),
"four/0.0",
"fourblah/1.0");
// Check reordering of exact matches.
assertMatchEquals(completion.lookup("four", 4),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
"fourier/0.0");
// 'one' is at the top after collecting all alphabetical results.
assertMatchEquals(completionAlphabetical.lookup("one", 2),
assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
// 'one' is not promoted after collecting two higher ranking results.
FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
assertMatchEquals(noPromotion.lookup("one", 2),
assertMatchEquals(noPromotion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"oneness/1.0",
"onerous/1.0");
// 'one' is at the top after collecting all alphabetical results.
assertMatchEquals(completionAlphabetical.lookup("one", 2),
assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
}
public void testMiss() throws Exception {
assertMatchEquals(completion.lookup("xyz", 1));
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("xyz", random), 1));
}
public void testAlphabeticWithWeights() throws Exception {
assertEquals(0, completionAlphabetical.lookup("xyz", 1).size());
assertEquals(0, completionAlphabetical.lookup(_TestUtil.stringToCharSequence("xyz", random), 1).size());
}
public void testFullMatchList() throws Exception {
assertMatchEquals(completion.lookup("one", Integer.MAX_VALUE),
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), Integer.MAX_VALUE),
"oneness/1.0",
"onerous/1.0",
"onesimus/1.0",
@ -148,7 +148,7 @@ public class FSTCompletionTest extends LuceneTestCase {
builder.add(new BytesRef(key), 0);
FSTCompletion lookup = builder.build();
List<Completion> result = lookup.lookup(key, 1);
List<Completion> result = lookup.lookup(_TestUtil.stringToCharSequence(key, random), 1);
assertEquals(1, result.size());
}
@ -158,7 +158,7 @@ public class FSTCompletionTest extends LuceneTestCase {
Random r = random;
List<TermFreq> keys = new ArrayList<TermFreq>();
for (int i = 0; i < 5000; i++) {
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f));
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
}
lookup.build(new TermFreqArrayIterator(keys));
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
// are.
Float previous = null;
for (TermFreq tf : keys) {
Float current = lookup.get(tf.term.utf8ToString());
Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
if (previous != null) {
assertEquals(previous, current);
}
@ -180,28 +180,27 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(input));
for (TermFreq tf : input) {
assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
assertEquals(tf.term.utf8ToString(), lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
}
List<LookupResult> result = lookup.lookup("wit", true, 5);
List<LookupResult> result = lookup.lookup(_TestUtil.stringToCharSequence("wit", random), true, 5);
assertEquals(5, result.size());
assertTrue(result.get(0).key.equals("wit")); // exact match.
assertTrue(result.get(1).key.equals("with")); // highest count.
assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
assertTrue(result.get(1).key.toString().equals("with")); // highest count.
}
public void testEmptyInput() throws Exception {
completion = new FSTCompletionBuilder().build();
assertMatchEquals(completion.lookup("", 10));
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("", random), 10));
}
public void testRandom() throws Exception {
List<TermFreq> freqs = new ArrayList<TermFreq>();
Random rnd = random;
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
float weight = rnd.nextFloat() * 100;
int weight = random.nextInt(100);
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
}
@ -212,8 +211,8 @@ public class FSTCompletionTest extends LuceneTestCase {
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
assertTrue(lr.key.startsWith(prefix));
for (LookupResult lr : lookup.lookup(_TestUtil.stringToCharSequence(prefix, random), true, 10)) {
assertTrue(lr.key.toString().startsWith(prefix));
}
}
}

View File

@ -45,33 +45,33 @@ public class WFSTCompletionTest extends LuceneTestCase {
suggester.build(new TermFreqArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup("f", false, 2);
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key);
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher
results = suggester.lookup("bar", false, 1);
results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key);
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
results = suggester.lookup("b", false, 2);
results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key);
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
results = suggester.lookup("ba", false, 3);
results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key);
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
}
@ -100,7 +100,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random.nextInt(1<<24);
slowCompletor.put(s, (long)weight);
keys[i] = new TermFreq(s, (float) weight);
keys[i] = new TermFreq(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
@ -109,7 +109,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
for (String prefix : allPrefixes) {
final int topN = _TestUtil.nextInt(random, 1, 10);
List<LookupResult> r = suggester.lookup(prefix, false, topN);
List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN);
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
final List<LookupResult> matches = new ArrayList<LookupResult>();
@ -126,7 +126,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
public int compare(LookupResult left, LookupResult right) {
int cmp = Float.compare(right.value, left.value);
if (cmp == 0) {
return left.key.compareTo(right.key);
return left.compareTo(right);
} else {
return cmp;
}
@ -140,7 +140,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
for(int hit=0;hit<r.size();hit++) {
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).key, r.get(hit).key);
assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.suggest.FileDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
@ -152,7 +153,7 @@ public class Suggester extends SolrSpellChecker {
build(core, searcher);
}
public void add(String query, int numHits) {
public void add(CharsRef query, int numHits) {
LOG.info("add " + query + ", " + numHits);
lookup.add(query, new Integer(numHits));
}
@ -167,9 +168,12 @@ public class Suggester extends SolrSpellChecker {
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
CharsRef scratch = new CharsRef();
for (Token t : options.tokens) {
String term = new String(t.buffer(), 0, t.length());
List<LookupResult> suggestions = lookup.lookup(term,
scratch.chars = t.buffer();
scratch.offset = 0;
scratch.length = t.length();
List<LookupResult> suggestions = lookup.lookup(scratch,
options.onlyMorePopular, options.count);
if (suggestions == null) {
continue;
@ -178,7 +182,7 @@ public class Suggester extends SolrSpellChecker {
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
res.add(t, lr.key, ((Number)lr.value).intValue());
res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
}
}
return res;