LUCENE-3807: Clean up Suggest API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293148 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-02-24 09:49:39 +00:00
parent 61387fe283
commit f29eda768d
40 changed files with 289 additions and 243 deletions

View File

@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
} }

View File

@ -654,7 +654,7 @@ public class DocTermOrds {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return termsEnum.getComparator(); return termsEnum.getComparator();
} }

View File

@ -174,7 +174,7 @@ public class FilterAtomicReader extends AtomicReader {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return in.getComparator(); return in.getComparator();
} }

View File

@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum extends TermsEnum {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return tenum.getComparator(); return tenum.getComparator();
} }

View File

@ -179,13 +179,6 @@ public abstract class TermsEnum implements BytesRefIterator {
} }
}; };
} }
/** Return the {@link BytesRef} Comparator used to sort
* terms provided by the iterator. This may return
* null if there are no terms. Callers may invoke this
* method many times, so it's best to cache a single
* instance & reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g. /** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery} * in {@link org.apache.lucene.search.MultiTermQuery}

View File

@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCache {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }

View File

@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
} }
@Override @Override
public Comparator<BytesRef> getComparator() throws IOException { public Comparator<BytesRef> getComparator() {
return actualEnum.getComparator(); return actualEnum.getComparator();
} }

View File

@ -233,13 +233,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
final byte[] bBytes = b.bytes; final byte[] bBytes = b.bytes;
int bUpto = b.offset; int bUpto = b.offset;
final int aStop; final int aStop = aUpto + Math.min(a.length, b.length);
if (a.length < b.length) {
aStop = aUpto + a.length;
} else {
aStop = aUpto + b.length;
}
while(aUpto < aStop) { while(aUpto < aStop) {
int aByte = aBytes[aUpto++] & 0xff; int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff; int bByte = bBytes[bUpto++] & 0xff;

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Comparator;
/** /**
* A simple iterator interface for {@link BytesRef} iteration * A simple iterator interface for {@link BytesRef} iteration
@ -40,6 +41,14 @@ public interface BytesRefIterator {
*/ */
public BytesRef next() throws IOException; public BytesRef next() throws IOException;
/**
* Return the {@link BytesRef} Comparator used to sort terms provided by the
* iterator. This may return null if there are no items or the iterator is not
* sorted. Callers may invoke this method many times, so it's best to cache a
* single instance & reuse it.
*/
public Comparator<BytesRef> getComparator();
public final static class EmptyBytesRefIterator implements BytesRefIterator { public final static class EmptyBytesRefIterator implements BytesRefIterator {
@Override @Override
@ -47,6 +56,10 @@ public interface BytesRefIterator {
return null; return null;
} }
public Comparator<BytesRef> getComparator() {
return null;
}
} }
} }

View File

@ -26,6 +26,7 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.PrintStream; import java.io.PrintStream;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.nio.CharBuffer;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -707,4 +708,23 @@ public class _TestUtil {
} }
return termsEnum.docs(liveDocs, null, needsFreqs); return termsEnum.docs(liveDocs, null, needsFreqs);
} }
public static CharSequence stringToCharSequence(String string, Random random) {
return bytesToCharSequence(new BytesRef(string), random);
}
public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
switch(random.nextInt(5)) {
case 4:
CharsRef chars = new CharsRef(ref.length);
UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
return chars;
case 3:
return CharBuffer.wrap(ref.utf8ToString());
default:
return ref.utf8ToString();
}
}
} }

View File

@ -16,6 +16,7 @@ package org.apache.lucene.search.spell;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
/** /**
@ -30,5 +31,5 @@ public interface Dictionary {
* Return all words present in the dictionary * Return all words present in the dictionary
* @return Iterator * @return Iterator
*/ */
BytesRefIterator getWordsIterator(); BytesRefIterator getWordsIterator() throws IOException;
} }

View File

@ -19,16 +19,13 @@ package org.apache.lucene.search.spell;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
/** /**
* HighFrequencyDictionary: terms taken from the given field * HighFrequencyDictionary: terms taken from the given field
@ -44,7 +41,6 @@ public class HighFrequencyDictionary implements Dictionary {
private IndexReader reader; private IndexReader reader;
private String field; private String field;
private float thresh; private float thresh;
private final CharsRef spare = new CharsRef();
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) { public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader; this.reader = reader;
@ -52,66 +48,55 @@ public class HighFrequencyDictionary implements Dictionary {
this.thresh = thresh; this.thresh = thresh;
} }
public final BytesRefIterator getWordsIterator() { public final BytesRefIterator getWordsIterator() throws IOException {
return new HighFrequencyIterator(); return new HighFrequencyIterator();
} }
final class HighFrequencyIterator implements TermFreqIterator, SortedIterator { final class HighFrequencyIterator implements TermFreqIterator {
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum; private final TermsEnum termsEnum;
private int minNumDocs; private int minNumDocs;
private long freq;
HighFrequencyIterator() { HighFrequencyIterator() throws IOException {
try { Terms terms = MultiFields.getTerms(reader, field);
Terms terms = MultiFields.getTerms(reader, field); if (terms != null) {
if (terms != null) { termsEnum = terms.iterator(null);
termsEnum = terms.iterator(null); } else {
} else { termsEnum = null;
termsEnum = null;
}
minNumDocs = (int)(thresh * (float)reader.numDocs());
} catch (IOException e) {
throw new RuntimeException(e);
} }
minNumDocs = (int)(thresh * (float)reader.numDocs());
} }
private boolean isFrequent(int freq) { private boolean isFrequent(int freq) {
return freq >= minNumDocs; return freq >= minNumDocs;
} }
public float freq() { public long weight() {
try { return freq;
return termsEnum.docFreq();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} }
@Override @Override
public BytesRef next() throws IOException { public BytesRef next() throws IOException {
if (termsEnum != null) { if (termsEnum != null) {
BytesRef next; BytesRef next;
while ((next = termsEnum.next()) != null) { while((next = termsEnum.next()) != null) {
if (isFrequent(termsEnum.docFreq())) { if (isFrequent(termsEnum.docFreq())) {
freq = termsEnum.docFreq();
spare.copyBytes(next); spare.copyBytes(next);
return spare; return spare;
} }
} }
} }
return null; return null;
} }
@Override @Override
public Comparator<BytesRef> comparator() { public Comparator<BytesRef> getComparator() {
try { if (termsEnum == null) {
if (termsEnum == null) { return null;
return null; } else {
} else { return termsEnum.getComparator();
return termsEnum.getComparator();
}
} catch (IOException e) {
throw new RuntimeException(e);
} }
} }
} }

View File

@ -43,17 +43,12 @@ public class LuceneDictionary implements Dictionary {
this.field = field; this.field = field;
} }
public final BytesRefIterator getWordsIterator() { public final BytesRefIterator getWordsIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
try { if (terms != null) {
final Terms terms = MultiFields.getTerms(reader, field); return terms.iterator(null);
if (terms != null) { } else {
return terms.iterator(null); return BytesRefIterator.EMPTY_ITERATOR;
} else {
return BytesRefIterator.EMPTY_ITERATOR;
}
} catch (IOException e) {
throw new RuntimeException(e);
} }
} }

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search.spell;
*/ */
import java.util.Iterator; import java.util.Comparator;
import java.io.*; import java.io.*;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -53,7 +53,7 @@ public class PlainTextDictionary implements Dictionary {
in = new BufferedReader(reader); in = new BufferedReader(reader);
} }
public BytesRefIterator getWordsIterator() { public BytesRefIterator getWordsIterator() throws IOException {
return new FileIterator(); return new FileIterator();
} }
@ -85,6 +85,11 @@ public class PlainTextDictionary implements Dictionary {
} }
return result; return result;
} }
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
} }
} }

View File

@ -1,33 +0,0 @@
package org.apache.lucene.search.spell;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
/**
* Marker interface to signal that elements coming from {@link Iterator}
* come in ascending lexicographic order.
*/
public interface SortedIterator {
public Comparator<BytesRef> comparator();
}

View File

@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
public interface TermFreqIterator extends BytesRefIterator { public interface TermFreqIterator extends BytesRefIterator {
public float freq(); public long weight();
public static class TermFreqIteratorWrapper implements TermFreqIterator { public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped; private BytesRefIterator wrapped;
@ -32,12 +34,17 @@ public interface TermFreqIterator extends BytesRefIterator {
this.wrapped = wrapped; this.wrapped = wrapped;
} }
public float freq() { public long weight() {
return 1.0f; return 1;
} }
public BytesRef next() throws IOException { public BytesRef next() throws IOException {
return wrapped.next(); return wrapped.next();
} }
@Override
public Comparator<BytesRef> getComparator() {
return wrapped.getComparator();
}
} }
} }

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements. * This wrapper buffers incoming elements.
*/ */
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
// TODO keep this for now
protected BytesRefList entries = new BytesRefList(); protected BytesRefList entries = new BytesRefList();
protected int curPos = -1; protected int curPos = -1;
protected float[] freqs = new float[1]; protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
this.comp = source.getComparator();
BytesRef spare; BytesRef spare;
int freqIndex = 0; int freqIndex = 0;
while((spare = source.next()) != null) { while((spare = source.next()) != null) {
@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
if (freqIndex >= freqs.length) { if (freqIndex >= freqs.length) {
freqs = ArrayUtil.grow(freqs, freqs.length+1); freqs = ArrayUtil.grow(freqs, freqs.length+1);
} }
freqs[freqIndex++] = source.freq(); freqs[freqIndex++] = source.weight();
} }
} }
public float freq() { public long weight() {
return freqs[curPos]; return freqs[curPos];
} }
@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
return null; return null;
} }
@Override
public Comparator<BytesRef> getComparator() {
return comp;
}
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -83,6 +84,11 @@ final class BytesRefList {
} }
return null; return null;
} }
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
}; };
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.*; import java.io.*;
import java.util.Comparator;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
@ -56,11 +57,11 @@ public class FileDictionary implements Dictionary {
} }
final class FileIterator implements TermFreqIterator { final class FileIterator implements TermFreqIterator {
private float curFreq; private long curFreq;
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
public float freq() { public long weight() {
return curFreq; return curFreq;
} }
@ -73,7 +74,8 @@ public class FileDictionary implements Dictionary {
if (line != null) { if (line != null) {
String[] fields = line.split("\t"); String[] fields = line.split("\t");
if (fields.length > 1) { if (fields.length > 1) {
curFreq = Float.parseFloat(fields[1]); // keep reading floats for bw compat
curFreq = (int)Float.parseFloat(fields[1]);
spare.copyChars(fields[0]); spare.copyChars(fields[0]);
} else { } else {
spare.copyChars(line); spare.copyChars(line);
@ -86,6 +88,11 @@ public class FileDictionary implements Dictionary {
return null; return null;
} }
} }
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
} }
} }

View File

@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
@ -33,10 +34,10 @@ public abstract class Lookup {
* Result of a lookup. * Result of a lookup.
*/ */
public static final class LookupResult implements Comparable<LookupResult> { public static final class LookupResult implements Comparable<LookupResult> {
public final String key; public final CharSequence key;
public final float value; public final float value;
public LookupResult(String key, float value) { public LookupResult(CharSequence key, float value) {
this.key = key; this.key = key;
this.value = value; this.value = value;
} }
@ -48,10 +49,32 @@ public abstract class Lookup {
/** Compare alphabetically. */ /** Compare alphabetically. */
public int compareTo(LookupResult o) { public int compareTo(LookupResult o) {
return this.key.compareTo(o.key); return CHARSEQUENCE_COMPARATOR.compare(key, o.key);
} }
} }
public static final Comparator<CharSequence> CHARSEQUENCE_COMPARATOR = new CharSequenceComparator();
private static class CharSequenceComparator implements Comparator<CharSequence> {
@Override
public int compare(CharSequence o1, CharSequence o2) {
final int l1 = o1.length();
final int l2 = o2.length();
final int aStop = Math.min(l1, l2);
for (int i = 0; i < aStop; i++) {
int diff = o1.charAt(i) - o2.charAt(i);
if (diff != 0) {
return diff;
}
}
// One is a prefix of the other, or, they are equal:
return l1 - l2;
}
}
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> { public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
public LookupPriorityQueue(int size) { public LookupPriorityQueue(int size) {
@ -99,8 +122,7 @@ public abstract class Lookup {
* @param num maximum number of results to return * @param num maximum number of results to return
* @return a list of possible completions, with their relative weight (e.g. popularity) * @return a list of possible completions, with their relative weight (e.g. popularity)
*/ */
// TODO: this should be a BytesRef API? public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
/** /**
* Modify the lookup data by recording additional data. Optional operation. * Modify the lookup data by recording additional data. Optional operation.
@ -109,16 +131,14 @@ public abstract class Lookup {
* @return true if new key is added, false if it already exists or operation * @return true if new key is added, false if it already exists or operation
* is not supported. * is not supported.
*/ */
// TODO: this should be a BytesRef API? public abstract boolean add(CharSequence key, Object value);
public abstract boolean add(String key, Object value);
/** /**
* Get value associated with a specific key. * Get value associated with a specific key.
* @param key lookup key * @param key lookup key
* @return associated value * @return associated value
*/ */
// TODO: this should be a BytesRef API? public abstract Object get(CharSequence key);
public abstract Object get(String key);
/** /**
* Persist the constructed lookup data to a directory. Optional operation. * Persist the constructed lookup data to a directory. Optional operation.

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements and makes sure they are sorted in * This wrapper buffers incoming elements and makes sure they are sorted in
* ascending lexicographic order. * ascending lexicographic order.
*/ */
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator { public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
// TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
private final int[] sortedOrds; private final int[] sortedOrds;
private int currentOrd = -1; private int currentOrd = -1;
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp; private final Comparator<BytesRef> comp;
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException { public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
super(source); super(source);
@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
} }
@Override @Override
public float freq() { public long weight() {
return freqs[currentOrd]; return freqs[currentOrd];
} }
@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
} }
@Override @Override
public Comparator<BytesRef> comparator() { public Comparator<BytesRef> getComparator() {
return comp; return comp;
} }
} }

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
* random order. * random order.
*/ */
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
// TODO keep this for now
private final int[] ords; private final int[] ords;
private int currentOrd = -1; private int currentOrd = -1;
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
} }
@Override @Override
public float freq() { public long weight() {
return freqs[currentOrd]; return freqs[currentOrd];
} }

View File

@ -199,7 +199,7 @@ public class FSTCompletion {
* @return Returns the suggestions, sorted by their approximated weight first * @return Returns the suggestions, sorted by their approximated weight first
* (decreasing) and then alphabetically (UTF-8 codepoint order). * (decreasing) and then alphabetically (UTF-8 codepoint order).
*/ */
public List<Completion> lookup(String key, int num) { public List<Completion> lookup(CharSequence key, int num) {
if (key.length() == 0 || automaton == null) { if (key.length() == 0 || automaton == null) {
return EMPTY_RESULT; return EMPTY_RESULT;
} }
@ -388,7 +388,7 @@ public class FSTCompletion {
* Returns the bucket assigned to a given key (if found) or <code>null</code> if * Returns the bucket assigned to a given key (if found) or <code>null</code> if
* no exact match exists. * no exact match exists.
*/ */
public Integer getBucket(String key) { public Integer getBucket(CharSequence key) {
return getExactMatchStartingFromRootArc(0, new BytesRef(key)); return getExactMatchStartingFromRootArc(0, new BytesRef(key));
} }

View File

@ -46,7 +46,7 @@ import org.apache.lucene.util.fst.*;
* </ul> * </ul>
* *
* <p> * <p>
* At runtime, in {@link FSTCompletion#lookup(String, int)}, * At runtime, in {@link FSTCompletion#lookup(CharSequence, int)},
* the automaton is utilized as follows: * the automaton is utilized as follows:
* <ul> * <ul>
* <li>For each possible term weight encoded in the automaton (cached arcs from * <li>For each possible term weight encoded in the automaton (cached arcs from

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutputs;
* An adapter from {@link Lookup} API to {@link FSTCompletion}. * An adapter from {@link Lookup} API to {@link FSTCompletion}.
* *
* <p>This adapter differs from {@link FSTCompletion} in that it attempts * <p>This adapter differs from {@link FSTCompletion} in that it attempts
* to discretize any "weights" as passed from in {@link TermFreqIterator#freq()} * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see * to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}. * {@link FSTCompletion}.
* *
@ -171,7 +171,7 @@ public class FSTCompletionLookup extends Lookup {
} }
output.reset(buffer); output.reset(buffer);
output.writeInt(FloatMagic.toSortable(tfit.freq())); output.writeInt(FloatMagic.toSortable(tfit.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
} }
@ -232,7 +232,7 @@ public class FSTCompletionLookup extends Lookup {
} }
@Override @Override
public List<LookupResult> lookup(String key, boolean higherWeightsFirst, int num) { public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
final List<Completion> completions; final List<Completion> completions;
if (higherWeightsFirst) { if (higherWeightsFirst) {
completions = higherWeightsCompletion.lookup(key, num); completions = higherWeightsCompletion.lookup(key, num);
@ -241,20 +241,23 @@ public class FSTCompletionLookup extends Lookup {
} }
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size()); final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
CharsRef spare = new CharsRef();
for (Completion c : completions) { for (Completion c : completions) {
results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket)); spare.grow(c.utf8.length);
UnicodeUtil.UTF8toUTF16(c.utf8, spare);
results.add(new LookupResult(spare.toString(), c.bucket));
} }
return results; return results;
} }
@Override @Override
public boolean add(String key, Object value) { public boolean add(CharSequence key, Object value) {
// Not supported. // Not supported.
return false; return false;
} }
@Override @Override
public Float get(String key) { public Object get(CharSequence key) {
Integer bucket = normalCompletion.getBucket(key); Integer bucket = normalCompletion.getBucket(key);
if (bucket == null) if (bucket == null)
return null; return null;

View File

@ -33,8 +33,10 @@ import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc; import org.apache.lucene.util.fst.FST.Arc;
@ -121,7 +123,7 @@ public class WFSTCompletionLookup extends Lookup {
output.reset(buffer); output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeByte((byte)0); // separator: not used, just for sort order output.writeByte((byte)0); // separator: not used, just for sort order
output.writeInt((int)encodeWeight(iterator.freq())); output.writeInt((int)encodeWeight(iterator.weight()));
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
} }
writer.close(); writer.close();
@ -200,7 +202,7 @@ public class WFSTCompletionLookup extends Lookup {
} }
@Override @Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) { public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
assert num > 0; assert num > 0;
BytesRef scratch = new BytesRef(key); BytesRef scratch = new BytesRef(key);
int prefixLength = scratch.length; int prefixLength = scratch.length;
@ -217,8 +219,11 @@ public class WFSTCompletionLookup extends Lookup {
} }
List<LookupResult> results = new ArrayList<LookupResult>(num); List<LookupResult> results = new ArrayList<LookupResult>(num);
CharsRef spare = new CharsRef();
if (exactFirst && arc.isFinal()) { if (exactFirst && arc.isFinal()) {
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput))); spare.grow(scratch.length);
UnicodeUtil.UTF8toUTF16(scratch, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
if (--num == 0) { if (--num == 0) {
return results; // that was quick return results; // that was quick
} }
@ -236,8 +241,9 @@ public class WFSTCompletionLookup extends Lookup {
// append suffix // append suffix
Util.toBytesRef(completion.input, suffix); Util.toBytesRef(completion.input, suffix);
scratch.append(suffix); scratch.append(suffix);
spare.grow(scratch.length);
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output))); UnicodeUtil.UTF8toUTF16(scratch, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output)));
} }
return results; return results;
} }
@ -264,7 +270,7 @@ public class WFSTCompletionLookup extends Lookup {
} }
@Override @Override
public boolean add(String key, Object value) { public boolean add(CharSequence key, Object value) {
return false; // Not supported. return false; // Not supported.
} }
@ -273,7 +279,7 @@ public class WFSTCompletionLookup extends Lookup {
* or null if it does not exist. * or null if it does not exist.
*/ */
@Override @Override
public Float get(String key) { public Object get(CharSequence key) {
Arc<Long> arc = new Arc<Long>(); Arc<Long> arc = new Arc<Long>();
Long result = null; Long result = null;
try { try {

View File

@ -28,7 +28,6 @@ import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
@ -45,7 +44,7 @@ public class JaspellLookup extends Lookup {
@Override @Override
public void build(TermFreqIterator tfit) throws IOException { public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof SortedIterator) { if (tfit.getComparator() != null) {
// make sure it's unsorted // make sure it's unsorted
// WTF - this could result in yet another sorted iteration.... // WTF - this could result in yet another sorted iteration....
tfit = new UnsortedTermFreqIteratorWrapper(tfit); tfit = new UnsortedTermFreqIteratorWrapper(tfit);
@ -56,7 +55,7 @@ public class JaspellLookup extends Lookup {
final CharsRef charsSpare = new CharsRef(); final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) { while ((spare = tfit.next()) != null) {
float freq = tfit.freq(); float freq = tfit.weight();
if (spare.length == 0) { if (spare.length == 0) {
continue; continue;
} }
@ -67,19 +66,19 @@ public class JaspellLookup extends Lookup {
} }
@Override @Override
public boolean add(String key, Object value) { public boolean add(CharSequence key, Object value) {
trie.put(key, value); trie.put(key, value);
// XXX // XXX
return false; return false;
} }
@Override @Override
public Object get(String key) { public Object get(CharSequence key) {
return trie.get(key); return trie.get(key);
} }
@Override @Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) { public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<LookupResult> res = new ArrayList<LookupResult>(); List<LookupResult> res = new ArrayList<LookupResult>();
List<String> list; List<String> list;
int count = onlyMorePopular ? num * 2 : num; int count = onlyMorePopular ? num * 2 : num;
@ -97,7 +96,7 @@ public class JaspellLookup extends Lookup {
LookupPriorityQueue queue = new LookupPriorityQueue(num); LookupPriorityQueue queue = new LookupPriorityQueue(num);
for (String s : list) { for (String s : list) {
float freq = (Float)trie.get(s); float freq = (Float)trie.get(s);
queue.insertWithOverflow(new LookupResult(s, freq)); queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
} }
for (LookupResult lr : queue.getResults()) { for (LookupResult lr : queue.getResults()) {
res.add(lr); res.add(lr);
@ -106,7 +105,7 @@ public class JaspellLookup extends Lookup {
for (int i = 0; i < maxCnt; i++) { for (int i = 0; i < maxCnt; i++) {
String s = list.get(i); String s = list.get(i);
float freq = (Float)trie.get(s); float freq = (Float)trie.get(s);
res.add(new LookupResult(s, freq)); res.add(new LookupResult(new CharsRef(s), freq));
} }
} }
return res; return res;

View File

@ -368,8 +368,8 @@ public class JaspellTernarySearchTrie {
* A <code>String</code> index. * A <code>String</code> index.
*@return The object retrieved from the Ternary Search Trie. *@return The object retrieved from the Ternary Search Trie.
*/ */
public Object get(String key) { public Object get(CharSequence key) {
TSTNode node = getNode(key.trim().toLowerCase()); TSTNode node = getNode(key);
if (node == null) { if (node == null) {
return null; return null;
} }
@ -435,7 +435,7 @@ public class JaspellTernarySearchTrie {
*@return The node object indexed by key. This object is an instance of an *@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>. * inner class named <code>TernarySearchTrie.TSTNode</code>.
*/ */
public TSTNode getNode(String key) { public TSTNode getNode(CharSequence key) {
return getNode(key, rootNode); return getNode(key, rootNode);
} }
@ -443,15 +443,14 @@ public class JaspellTernarySearchTrie {
* Returns the node indexed by key, or <code>null</code> if that node doesn't * Returns the node indexed by key, or <code>null</code> if that node doesn't
* exist. The search begins at root node. * exist. The search begins at root node.
* *
*@param key2 *@param key
* A <code>String</code> that indexes the node that is returned. * A <code>String</code> that indexes the node that is returned.
*@param startNode *@param startNode
* The top node defining the subtrie to be searched. * The top node defining the subtrie to be searched.
*@return The node object indexed by key. This object is an instance of an *@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>. * inner class named <code>TernarySearchTrie.TSTNode</code>.
*/ */
protected TSTNode getNode(String key2, TSTNode startNode) { protected TSTNode getNode(CharSequence key, TSTNode startNode) {
String key = key2.trim().toLowerCase();
if (key == null || startNode == null || key.length() == 0) { if (key == null || startNode == null || key.length() == 0) {
return null; return null;
} }
@ -490,7 +489,7 @@ public class JaspellTernarySearchTrie {
*@exception IllegalArgumentException *@exception IllegalArgumentException
* If the key is an empty <code>String</code>. * If the key is an empty <code>String</code>.
*/ */
protected TSTNode getOrCreateNode(String key) throws NullPointerException, protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException,
IllegalArgumentException { IllegalArgumentException {
if (key == null) { if (key == null) {
throw new NullPointerException( throw new NullPointerException(
@ -568,7 +567,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned by this method. * The maximum number of values returned by this method.
*@return A <code>List</code> with the results *@return A <code>List</code> with the results
*/ */
public List<String> matchAlmost(String key, int numReturnValues) { public List<String> matchAlmost(CharSequence key, int numReturnValues) {
return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key, return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false); ((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
} }
@ -598,7 +597,7 @@ public class JaspellTernarySearchTrie {
*@return A <code>List</code> with the results. *@return A <code>List</code> with the results.
*/ */
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex, private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
int d, String matchAlmostKey, int matchAlmostNumReturnValues, int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues,
List<String> matchAlmostResult2, boolean upTo) { List<String> matchAlmostResult2, boolean upTo) {
if ((currentNode == null) if ((currentNode == null)
|| (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
@ -658,7 +657,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned from this method. * The maximum number of values returned from this method.
*@return A <code>List</code> with the results *@return A <code>List</code> with the results
*/ */
public List<String> matchPrefix(String prefix, int numReturnValues) { public List<String> matchPrefix(CharSequence prefix, int numReturnValues) {
Vector<String> sortKeysResult = new Vector<String>(); Vector<String> sortKeysResult = new Vector<String>();
TSTNode startNode = getNode(prefix); TSTNode startNode = getNode(prefix);
if (startNode == null) { if (startNode == null) {
@ -722,8 +721,8 @@ public class JaspellTernarySearchTrie {
*@param value *@param value
* The object to be stored in the Trie. * The object to be stored in the Trie.
*/ */
public void put(String key, Object value) { public void put(CharSequence key, Object value) {
getOrCreateNode(key.trim().toLowerCase()).data = value; getOrCreateNode(key).data = value;
} }
/** /**

View File

@ -57,7 +57,7 @@ public class TSTAutocomplete {
* index of character in key to be inserted currently. * index of character in key to be inserted currently.
* @return currentNode The new reference to root node of TST * @return currentNode The new reference to root node of TST
*/ */
public TernaryTreeNode insert(TernaryTreeNode currentNode, String s, public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s,
Object val, int x) { Object val, int x) {
if (s == null || s.length() <= x) { if (s == null || s.length() <= x) {
return currentNode; return currentNode;
@ -69,7 +69,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) { if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else { } else {
currentNode.token = s; currentNode.token = s.toString();
currentNode.val = val; currentNode.val = val;
return currentNode; return currentNode;
} }
@ -79,7 +79,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) { if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else { } else {
currentNode.token = s; currentNode.token = s.toString();
currentNode.val = val; currentNode.val = val;
return currentNode; return currentNode;
} }
@ -104,7 +104,7 @@ public class TSTAutocomplete {
* @return suggest list of auto-completed keys for the given prefix query. * @return suggest list of auto-completed keys for the given prefix query.
*/ */
public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root, public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root,
String s, int x) { CharSequence s, int x) {
TernaryTreeNode p = root; TernaryTreeNode p = root;
ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>(); ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>();

View File

@ -30,7 +30,6 @@ import java.util.List;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -45,7 +44,7 @@ public class TSTLookup extends Lookup {
public void build(TermFreqIterator tfit) throws IOException { public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode(); root = new TernaryTreeNode();
// buffer first // buffer first
if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order // make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
} }
@ -58,34 +57,47 @@ public class TSTLookup extends Lookup {
charsSpare.grow(spare.length); charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString()); tokens.add(charsSpare.toString());
vals.add(new Float(tfit.freq())); vals.add(new Float(tfit.weight()));
} }
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
} }
@Override @Override
public boolean add(String key, Object value) { public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0); autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created // XXX we don't know if a new node was created
return true; return true;
} }
@Override @Override
public Object get(String key) { public Object get(CharSequence key) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0); List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) { if (list == null || list.isEmpty()) {
return null; return null;
} }
for (TernaryTreeNode n : list) { for (TernaryTreeNode n : list) {
if (n.token.equals(key)) { if (charSeqEquals(n.token, key)) {
return n.val; return n.val;
} }
} }
return null; return null;
} }
private static boolean charSeqEquals(CharSequence left, CharSequence right) {
int len = left.length();
if (len != right.length()) {
return false;
}
for (int i = 0; i < len; i++) {
if (left.charAt(i) != right.charAt(i)) {
return false;
}
}
return true;
}
@Override @Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) { public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0); List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
List<LookupResult> res = new ArrayList<LookupResult>(); List<LookupResult> res = new ArrayList<LookupResult>();
if (list == null || list.size() == 0) { if (list == null || list.size() == 0) {

View File

@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
while ((line = br.readLine()) != null) { while ((line = br.readLine()) != null) {
int tab = line.indexOf('|'); int tab = line.indexOf('|');
assertTrue("No | separator?: " + line, tab >= 0); assertTrue("No | separator?: " + line, tab >= 0);
float weight = Float.parseFloat(line.substring(tab + 1)); int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab); String key = line.substring(0, tab);
input.add(new TermFreq(key, weight)); input.add(new TermFreq(key, weight));
} }

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup; import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup; import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class PersistenceTest extends LuceneTestCase { public class PersistenceTest extends LuceneTestCase {
public final String[] keys = new String[] { public final String[] keys = new String[] {
@ -61,7 +62,7 @@ public class PersistenceTest extends LuceneTestCase {
Lookup lookup = lookupClass.newInstance(); Lookup lookup = lookupClass.newInstance();
TermFreq[] keys = new TermFreq[this.keys.length]; TermFreq[] keys = new TermFreq[this.keys.length];
for (int i = 0; i < keys.length; i++) for (int i = 0; i < keys.length; i++)
keys[i] = new TermFreq(this.keys[i], (float) i); keys[i] = new TermFreq(this.keys[i], i);
lookup.build(new TermFreqArrayIterator(keys)); lookup.build(new TermFreqArrayIterator(keys));
// Store the suggester. // Store the suggester.
@ -75,7 +76,7 @@ public class PersistenceTest extends LuceneTestCase {
// Assert validity. // Assert validity.
float previous = Float.NEGATIVE_INFINITY; float previous = Float.NEGATIVE_INFINITY;
for (TermFreq k : keys) { for (TermFreq k : keys) {
Float val = (Float) lookup.get(k.term.utf8ToString()); Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
assertNotNull(k.term.utf8ToString(), val); assertNotNull(k.term.utf8ToString(), val);
if (supportsExactWeights) { if (supportsExactWeights) {

View File

@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef;
public final class TermFreq { public final class TermFreq {
public final BytesRef term; public final BytesRef term;
public final float v; public final long v;
public TermFreq(String term, float v) { public TermFreq(String term, long v) {
this(new BytesRef(term), v); this(new BytesRef(term), v);
} }
public TermFreq(BytesRef term, float v) { public TermFreq(BytesRef term, long v) {
this.term = term; this.term = term;
this.v = v; this.v = v;
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqIterator;
@ -44,7 +45,7 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
this(i.iterator()); this(i.iterator());
} }
public float freq() { public long weight() {
return current.v; return current.v;
} }
@ -57,4 +58,9 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
} }
return null; return null;
} }
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
} }

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;

View File

@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary; import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -37,8 +36,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir); IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
BytesRefIterator tf = dictionary.getWordsIterator(); BytesRefIterator tf = dictionary.getWordsIterator();
assertTrue(tf instanceof SortedIterator); assertNull(tf.getComparator());
((SortedIterator)tf).comparator();
assertNull(tf.next()); assertNull(tf.next());
dir.close(); dir.close();
} }

View File

@ -38,7 +38,7 @@ public class TestTermFreqIterator extends LuceneTestCase {
public void testTerms() throws Exception { public void testTerms() throws Exception {
int num = atLeast(10000); int num = atLeast(10000);
TreeMap<BytesRef,Float> sorted = new TreeMap<BytesRef,Float>(); TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
TermFreq[] unsorted = new TermFreq[num]; TermFreq[] unsorted = new TermFreq[num];
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
@ -46,28 +46,28 @@ public class TestTermFreqIterator extends LuceneTestCase {
do { do {
key = new BytesRef(_TestUtil.randomUnicodeString(random)); key = new BytesRef(_TestUtil.randomUnicodeString(random));
} while (sorted.containsKey(key)); } while (sorted.containsKey(key));
float value = random.nextFloat(); long value = random.nextLong();
sorted.put(key, value); sorted.put(key, value);
unsorted[i] = new TermFreq(key, value); unsorted[i] = new TermFreq(key, value);
} }
// test the sorted iterator wrapper // test the sorted iterator wrapper
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator()); TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
Iterator<Map.Entry<BytesRef,Float>> expected = sorted.entrySet().iterator(); Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) { while (expected.hasNext()) {
Map.Entry<BytesRef,Float> entry = expected.next(); Map.Entry<BytesRef,Long> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next()); assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F); assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
} }
assertNull(wrapper.next()); assertNull(wrapper.next());
// test the unsorted iterator wrapper // test the unsorted iterator wrapper
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted)); wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
TreeMap<BytesRef,Float> actual = new TreeMap<BytesRef,Float>(); TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
BytesRef key; BytesRef key;
while ((key = wrapper.next()) != null) { while ((key = wrapper.next()) != null) {
float value = wrapper.freq(); long value = wrapper.weight();
actual.put(BytesRef.deepCopyOf(key), value); actual.put(BytesRef.deepCopyOf(key), value);
} }
assertEquals(sorted, actual); assertEquals(sorted, actual);

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}. * Unit tests for {@link FSTCompletion}.
*/ */
public class FSTCompletionTest extends LuceneTestCase { public class FSTCompletionTest extends LuceneTestCase {
public static TermFreq tf(String t, float v) { public static TermFreq tf(String t, int v) {
return new TermFreq(t, v); return new TermFreq(t, v);
} }
@ -62,28 +62,28 @@ public class FSTCompletionTest extends LuceneTestCase {
tf("foundation", 1), tf("foundation", 1),
tf("fourblah", 1), tf("fourblah", 1),
tf("fourteen", 1), tf("fourteen", 1),
tf("four", 0f), tf("four", 0),
tf("fourier", 0f), tf("fourier", 0),
tf("fourty", 0f), tf("fourty", 0),
tf("xo", 1), tf("xo", 1),
}; };
return keys; return keys;
} }
public void testExactMatchHighPriority() throws Exception { public void testExactMatchHighPriority() throws Exception {
assertMatchEquals(completion.lookup("two", 1), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("two", random), 1),
"two/1.0"); "two/1.0");
} }
public void testExactMatchLowPriority() throws Exception { public void testExactMatchLowPriority() throws Exception {
assertMatchEquals(completion.lookup("one", 2), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0", "one/0.0",
"oneness/1.0"); "oneness/1.0");
} }
public void testExactMatchReordering() throws Exception { public void testExactMatchReordering() throws Exception {
// Check reordering of exact matches. // Check reordering of exact matches.
assertMatchEquals(completion.lookup("four", 4), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0", "four/0.0",
"fourblah/1.0", "fourblah/1.0",
"fourteen/1.0", "fourteen/1.0",
@ -92,49 +92,49 @@ public class FSTCompletionTest extends LuceneTestCase {
public void testRequestedCount() throws Exception { public void testRequestedCount() throws Exception {
// 'one' is promoted after collecting two higher ranking results. // 'one' is promoted after collecting two higher ranking results.
assertMatchEquals(completion.lookup("one", 2), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0", "one/0.0",
"oneness/1.0"); "oneness/1.0");
// 'four' is collected in a bucket and then again as an exact match. // 'four' is collected in a bucket and then again as an exact match.
assertMatchEquals(completion.lookup("four", 2), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 2),
"four/0.0", "four/0.0",
"fourblah/1.0"); "fourblah/1.0");
// Check reordering of exact matches. // Check reordering of exact matches.
assertMatchEquals(completion.lookup("four", 4), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0", "four/0.0",
"fourblah/1.0", "fourblah/1.0",
"fourteen/1.0", "fourteen/1.0",
"fourier/0.0"); "fourier/0.0");
// 'one' is at the top after collecting all alphabetical results. // 'one' is at the top after collecting all alphabetical results.
assertMatchEquals(completionAlphabetical.lookup("one", 2), assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0", "one/0.0",
"oneness/1.0"); "oneness/1.0");
// 'one' is not promoted after collecting two higher ranking results. // 'one' is not promoted after collecting two higher ranking results.
FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false); FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
assertMatchEquals(noPromotion.lookup("one", 2), assertMatchEquals(noPromotion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"oneness/1.0", "oneness/1.0",
"onerous/1.0"); "onerous/1.0");
// 'one' is at the top after collecting all alphabetical results. // 'one' is at the top after collecting all alphabetical results.
assertMatchEquals(completionAlphabetical.lookup("one", 2), assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0", "one/0.0",
"oneness/1.0"); "oneness/1.0");
} }
public void testMiss() throws Exception { public void testMiss() throws Exception {
assertMatchEquals(completion.lookup("xyz", 1)); assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("xyz", random), 1));
} }
public void testAlphabeticWithWeights() throws Exception { public void testAlphabeticWithWeights() throws Exception {
assertEquals(0, completionAlphabetical.lookup("xyz", 1).size()); assertEquals(0, completionAlphabetical.lookup(_TestUtil.stringToCharSequence("xyz", random), 1).size());
} }
public void testFullMatchList() throws Exception { public void testFullMatchList() throws Exception {
assertMatchEquals(completion.lookup("one", Integer.MAX_VALUE), assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), Integer.MAX_VALUE),
"oneness/1.0", "oneness/1.0",
"onerous/1.0", "onerous/1.0",
"onesimus/1.0", "onesimus/1.0",
@ -148,7 +148,7 @@ public class FSTCompletionTest extends LuceneTestCase {
builder.add(new BytesRef(key), 0); builder.add(new BytesRef(key), 0);
FSTCompletion lookup = builder.build(); FSTCompletion lookup = builder.build();
List<Completion> result = lookup.lookup(key, 1); List<Completion> result = lookup.lookup(_TestUtil.stringToCharSequence(key, random), 1);
assertEquals(1, result.size()); assertEquals(1, result.size());
} }
@ -158,7 +158,7 @@ public class FSTCompletionTest extends LuceneTestCase {
Random r = random; Random r = random;
List<TermFreq> keys = new ArrayList<TermFreq>(); List<TermFreq> keys = new ArrayList<TermFreq>();
for (int i = 0; i < 5000; i++) { for (int i = 0; i < 5000; i++) {
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f)); keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
} }
lookup.build(new TermFreqArrayIterator(keys)); lookup.build(new TermFreqArrayIterator(keys));
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
// are. // are.
Float previous = null; Float previous = null;
for (TermFreq tf : keys) { for (TermFreq tf : keys) {
Float current = lookup.get(tf.term.utf8ToString()); Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
if (previous != null) { if (previous != null) {
assertEquals(previous, current); assertEquals(previous, current);
} }
@ -180,28 +180,27 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup(); FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(input)); lookup.build(new TermFreqArrayIterator(input));
for (TermFreq tf : input) { for (TermFreq tf : input) {
assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null); assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
assertEquals(tf.term.utf8ToString(), lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
} }
List<LookupResult> result = lookup.lookup("wit", true, 5); List<LookupResult> result = lookup.lookup(_TestUtil.stringToCharSequence("wit", random), true, 5);
assertEquals(5, result.size()); assertEquals(5, result.size());
assertTrue(result.get(0).key.equals("wit")); // exact match. assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
assertTrue(result.get(1).key.equals("with")); // highest count. assertTrue(result.get(1).key.toString().equals("with")); // highest count.
} }
public void testEmptyInput() throws Exception { public void testEmptyInput() throws Exception {
completion = new FSTCompletionBuilder().build(); completion = new FSTCompletionBuilder().build();
assertMatchEquals(completion.lookup("", 10)); assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("", random), 10));
} }
public void testRandom() throws Exception { public void testRandom() throws Exception {
List<TermFreq> freqs = new ArrayList<TermFreq>(); List<TermFreq> freqs = new ArrayList<TermFreq>();
Random rnd = random; Random rnd = random;
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
float weight = rnd.nextFloat() * 100; int weight = random.nextInt(100);
freqs.add(new TermFreq("" + rnd.nextLong(), weight)); freqs.add(new TermFreq("" + rnd.nextLong(), weight));
} }
@ -212,8 +211,8 @@ public class FSTCompletionTest extends LuceneTestCase {
final String term = tf.term.utf8ToString(); final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) { for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i); String prefix = term.substring(0, i);
for (LookupResult lr : lookup.lookup(prefix, true, 10)) { for (LookupResult lr : lookup.lookup(_TestUtil.stringToCharSequence(prefix, random), true, 10)) {
assertTrue(lr.key.startsWith(prefix)); assertTrue(lr.key.toString().startsWith(prefix));
} }
} }
} }

View File

@ -45,33 +45,33 @@ public class WFSTCompletionTest extends LuceneTestCase {
suggester.build(new TermFreqArrayIterator(keys)); suggester.build(new TermFreqArrayIterator(keys));
// top N of 2, but only foo is available // top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup("f", false, 2); List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
assertEquals(1, results.size()); assertEquals(1, results.size());
assertEquals("foo", results.get(0).key); assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F); assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher // top N of 1 for 'bar': we return this even though barbar is higher
results = suggester.lookup("bar", false, 1); results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1);
assertEquals(1, results.size()); assertEquals(1, results.size());
assertEquals("bar", results.get(0).key); assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F); assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b' // top N Of 2 for 'b'
results = suggester.lookup("b", false, 2); results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2);
assertEquals(2, results.size()); assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key); assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F); assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key); assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F); assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba' // top N of 3 for 'ba'
results = suggester.lookup("ba", false, 3); results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3);
assertEquals(3, results.size()); assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key); assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F); assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key); assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F); assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key); assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F); assertEquals(6, results.get(2).value, 0.01F);
} }
@ -100,7 +100,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
// we can probably do Integer.MAX_VALUE here, but why worry. // we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random.nextInt(1<<24); int weight = random.nextInt(1<<24);
slowCompletor.put(s, (long)weight); slowCompletor.put(s, (long)weight);
keys[i] = new TermFreq(s, (float) weight); keys[i] = new TermFreq(s, weight);
} }
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
@ -109,7 +109,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
for (String prefix : allPrefixes) { for (String prefix : allPrefixes) {
final int topN = _TestUtil.nextInt(random, 1, 10); final int topN = _TestUtil.nextInt(random, 1, 10);
List<LookupResult> r = suggester.lookup(prefix, false, topN); List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN);
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
final List<LookupResult> matches = new ArrayList<LookupResult>(); final List<LookupResult> matches = new ArrayList<LookupResult>();
@ -126,7 +126,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
public int compare(LookupResult left, LookupResult right) { public int compare(LookupResult left, LookupResult right) {
int cmp = Float.compare(right.value, left.value); int cmp = Float.compare(right.value, left.value);
if (cmp == 0) { if (cmp == 0) {
return left.key.compareTo(right.key); return left.compareTo(right);
} else { } else {
return cmp; return cmp;
} }
@ -140,7 +140,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
for(int hit=0;hit<r.size();hit++) { for(int hit=0;hit<r.size();hit++) {
//System.out.println(" check hit " + hit); //System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).key, r.get(hit).key); assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).value, r.get(hit).value, 0f); assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
} }
} }

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.suggest.FileDictionary; import org.apache.lucene.search.suggest.FileDictionary;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
@ -152,7 +153,7 @@ public class Suggester extends SolrSpellChecker {
build(core, searcher); build(core, searcher);
} }
public void add(String query, int numHits) { public void add(CharsRef query, int numHits) {
LOG.info("add " + query + ", " + numHits); LOG.info("add " + query + ", " + numHits);
lookup.add(query, new Integer(numHits)); lookup.add(query, new Integer(numHits));
} }
@ -167,9 +168,12 @@ public class Suggester extends SolrSpellChecker {
return EMPTY_RESULT; return EMPTY_RESULT;
} }
SpellingResult res = new SpellingResult(); SpellingResult res = new SpellingResult();
CharsRef scratch = new CharsRef();
for (Token t : options.tokens) { for (Token t : options.tokens) {
String term = new String(t.buffer(), 0, t.length()); scratch.chars = t.buffer();
List<LookupResult> suggestions = lookup.lookup(term, scratch.offset = 0;
scratch.length = t.length();
List<LookupResult> suggestions = lookup.lookup(scratch,
options.onlyMorePopular, options.count); options.onlyMorePopular, options.count);
if (suggestions == null) { if (suggestions == null) {
continue; continue;
@ -178,7 +182,7 @@ public class Suggester extends SolrSpellChecker {
Collections.sort(suggestions); Collections.sort(suggestions);
} }
for (LookupResult lr : suggestions) { for (LookupResult lr : suggestions) {
res.add(t, lr.key, ((Number)lr.value).intValue()); res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
} }
} }
return res; return res;