mirror of https://github.com/apache/lucene.git
LUCENE-3807: Clean up Suggest API
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293148 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
61387fe283
commit
f29eda768d
|
@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -654,7 +654,7 @@ public class DocTermOrds {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termsEnum.getComparator();
|
return termsEnum.getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -174,7 +174,7 @@ public class FilterAtomicReader extends AtomicReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return in.getComparator();
|
return in.getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return tenum.getComparator();
|
return tenum.getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,13 +179,6 @@ public abstract class TermsEnum implements BytesRefIterator {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return the {@link BytesRef} Comparator used to sort
|
|
||||||
* terms provided by the iterator. This may return
|
|
||||||
* null if there are no terms. Callers may invoke this
|
|
||||||
* method many times, so it's best to cache a single
|
|
||||||
* instance & reuse it. */
|
|
||||||
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
|
||||||
|
|
||||||
/** An empty TermsEnum for quickly returning an empty instance e.g.
|
/** An empty TermsEnum for quickly returning an empty instance e.g.
|
||||||
* in {@link org.apache.lucene.search.MultiTermQuery}
|
* in {@link org.apache.lucene.search.MultiTermQuery}
|
||||||
|
|
|
@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() throws IOException {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return actualEnum.getComparator();
|
return actualEnum.getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -233,13 +233,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
||||||
final byte[] bBytes = b.bytes;
|
final byte[] bBytes = b.bytes;
|
||||||
int bUpto = b.offset;
|
int bUpto = b.offset;
|
||||||
|
|
||||||
final int aStop;
|
final int aStop = aUpto + Math.min(a.length, b.length);
|
||||||
if (a.length < b.length) {
|
|
||||||
aStop = aUpto + a.length;
|
|
||||||
} else {
|
|
||||||
aStop = aUpto + b.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
while(aUpto < aStop) {
|
while(aUpto < aStop) {
|
||||||
int aByte = aBytes[aUpto++] & 0xff;
|
int aByte = aBytes[aUpto++] & 0xff;
|
||||||
int bByte = bBytes[bUpto++] & 0xff;
|
int bByte = bBytes[bUpto++] & 0xff;
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple iterator interface for {@link BytesRef} iteration
|
* A simple iterator interface for {@link BytesRef} iteration
|
||||||
|
@ -40,6 +41,14 @@ public interface BytesRefIterator {
|
||||||
*/
|
*/
|
||||||
public BytesRef next() throws IOException;
|
public BytesRef next() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the {@link BytesRef} Comparator used to sort terms provided by the
|
||||||
|
* iterator. This may return null if there are no items or the iterator is not
|
||||||
|
* sorted. Callers may invoke this method many times, so it's best to cache a
|
||||||
|
* single instance & reuse it.
|
||||||
|
*/
|
||||||
|
public Comparator<BytesRef> getComparator();
|
||||||
|
|
||||||
public final static class EmptyBytesRefIterator implements BytesRefIterator {
|
public final static class EmptyBytesRefIterator implements BytesRefIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -47,6 +56,10 @@ public interface BytesRefIterator {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
import java.util.Enumeration;
|
import java.util.Enumeration;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -707,4 +708,23 @@ public class _TestUtil {
|
||||||
}
|
}
|
||||||
return termsEnum.docs(liveDocs, null, needsFreqs);
|
return termsEnum.docs(liveDocs, null, needsFreqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static CharSequence stringToCharSequence(String string, Random random) {
|
||||||
|
return bytesToCharSequence(new BytesRef(string), random);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
|
||||||
|
switch(random.nextInt(5)) {
|
||||||
|
case 4:
|
||||||
|
CharsRef chars = new CharsRef(ref.length);
|
||||||
|
UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
|
||||||
|
return chars;
|
||||||
|
case 3:
|
||||||
|
return CharBuffer.wrap(ref.utf8ToString());
|
||||||
|
default:
|
||||||
|
return ref.utf8ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.search.spell;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -30,5 +31,5 @@ public interface Dictionary {
|
||||||
* Return all words present in the dictionary
|
* Return all words present in the dictionary
|
||||||
* @return Iterator
|
* @return Iterator
|
||||||
*/
|
*/
|
||||||
BytesRefIterator getWordsIterator();
|
BytesRefIterator getWordsIterator() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,16 +19,13 @@ package org.apache.lucene.search.spell;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.CharsRef;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HighFrequencyDictionary: terms taken from the given field
|
* HighFrequencyDictionary: terms taken from the given field
|
||||||
|
@ -44,7 +41,6 @@ public class HighFrequencyDictionary implements Dictionary {
|
||||||
private IndexReader reader;
|
private IndexReader reader;
|
||||||
private String field;
|
private String field;
|
||||||
private float thresh;
|
private float thresh;
|
||||||
private final CharsRef spare = new CharsRef();
|
|
||||||
|
|
||||||
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
|
@ -52,66 +48,55 @@ public class HighFrequencyDictionary implements Dictionary {
|
||||||
this.thresh = thresh;
|
this.thresh = thresh;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final BytesRefIterator getWordsIterator() {
|
public final BytesRefIterator getWordsIterator() throws IOException {
|
||||||
return new HighFrequencyIterator();
|
return new HighFrequencyIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
|
final class HighFrequencyIterator implements TermFreqIterator {
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
private final TermsEnum termsEnum;
|
private final TermsEnum termsEnum;
|
||||||
private int minNumDocs;
|
private int minNumDocs;
|
||||||
|
private long freq;
|
||||||
|
|
||||||
HighFrequencyIterator() {
|
HighFrequencyIterator() throws IOException {
|
||||||
try {
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
Terms terms = MultiFields.getTerms(reader, field);
|
if (terms != null) {
|
||||||
if (terms != null) {
|
termsEnum = terms.iterator(null);
|
||||||
termsEnum = terms.iterator(null);
|
} else {
|
||||||
} else {
|
termsEnum = null;
|
||||||
termsEnum = null;
|
|
||||||
}
|
|
||||||
minNumDocs = (int)(thresh * (float)reader.numDocs());
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
|
minNumDocs = (int)(thresh * (float)reader.numDocs());
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isFrequent(int freq) {
|
private boolean isFrequent(int freq) {
|
||||||
return freq >= minNumDocs;
|
return freq >= minNumDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float freq() {
|
public long weight() {
|
||||||
try {
|
return freq;
|
||||||
return termsEnum.docFreq();
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
throw new RuntimeException(ioe);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
if (termsEnum != null) {
|
if (termsEnum != null) {
|
||||||
BytesRef next;
|
BytesRef next;
|
||||||
while ((next = termsEnum.next()) != null) {
|
while((next = termsEnum.next()) != null) {
|
||||||
if (isFrequent(termsEnum.docFreq())) {
|
if (isFrequent(termsEnum.docFreq())) {
|
||||||
|
freq = termsEnum.docFreq();
|
||||||
spare.copyBytes(next);
|
spare.copyBytes(next);
|
||||||
return spare;
|
return spare;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> comparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
try {
|
if (termsEnum == null) {
|
||||||
if (termsEnum == null) {
|
return null;
|
||||||
return null;
|
} else {
|
||||||
} else {
|
return termsEnum.getComparator();
|
||||||
return termsEnum.getComparator();
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,17 +43,12 @@ public class LuceneDictionary implements Dictionary {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final BytesRefIterator getWordsIterator() {
|
public final BytesRefIterator getWordsIterator() throws IOException {
|
||||||
|
final Terms terms = MultiFields.getTerms(reader, field);
|
||||||
try {
|
if (terms != null) {
|
||||||
final Terms terms = MultiFields.getTerms(reader, field);
|
return terms.iterator(null);
|
||||||
if (terms != null) {
|
} else {
|
||||||
return terms.iterator(null);
|
return BytesRefIterator.EMPTY_ITERATOR;
|
||||||
} else {
|
|
||||||
return BytesRefIterator.EMPTY_ITERATOR;
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.search.spell;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Comparator;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -53,7 +53,7 @@ public class PlainTextDictionary implements Dictionary {
|
||||||
in = new BufferedReader(reader);
|
in = new BufferedReader(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
public BytesRefIterator getWordsIterator() {
|
public BytesRefIterator getWordsIterator() throws IOException {
|
||||||
return new FileIterator();
|
return new FileIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,6 +85,11 @@ public class PlainTextDictionary implements Dictionary {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
package org.apache.lucene.search.spell;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Marker interface to signal that elements coming from {@link Iterator}
|
|
||||||
* come in ascending lexicographic order.
|
|
||||||
*/
|
|
||||||
public interface SortedIterator {
|
|
||||||
|
|
||||||
public Comparator<BytesRef> comparator();
|
|
||||||
|
|
||||||
}
|
|
|
@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
|
|
||||||
public interface TermFreqIterator extends BytesRefIterator {
|
public interface TermFreqIterator extends BytesRefIterator {
|
||||||
|
|
||||||
public float freq();
|
public long weight();
|
||||||
|
|
||||||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
private BytesRefIterator wrapped;
|
private BytesRefIterator wrapped;
|
||||||
|
@ -32,12 +34,17 @@ public interface TermFreqIterator extends BytesRefIterator {
|
||||||
this.wrapped = wrapped;
|
this.wrapped = wrapped;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return 1.0f;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
return wrapped.next();
|
return wrapped.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return wrapped.getComparator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* This wrapper buffers incoming elements.
|
* This wrapper buffers incoming elements.
|
||||||
*/
|
*/
|
||||||
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
|
// TODO keep this for now
|
||||||
protected BytesRefList entries = new BytesRefList();
|
protected BytesRefList entries = new BytesRefList();
|
||||||
protected int curPos = -1;
|
protected int curPos = -1;
|
||||||
protected float[] freqs = new float[1];
|
protected long[] freqs = new long[1];
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
|
private final Comparator<BytesRef> comp;
|
||||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||||
|
this.comp = source.getComparator();
|
||||||
BytesRef spare;
|
BytesRef spare;
|
||||||
int freqIndex = 0;
|
int freqIndex = 0;
|
||||||
while((spare = source.next()) != null) {
|
while((spare = source.next()) != null) {
|
||||||
|
@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
if (freqIndex >= freqs.length) {
|
if (freqIndex >= freqs.length) {
|
||||||
freqs = ArrayUtil.grow(freqs, freqs.length+1);
|
freqs = ArrayUtil.grow(freqs, freqs.length+1);
|
||||||
}
|
}
|
||||||
freqs[freqIndex++] = source.freq();
|
freqs[freqIndex++] = source.weight();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return freqs[curPos];
|
return freqs[curPos];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return comp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -83,6 +84,11 @@ final class BytesRefList {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
|
||||||
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
|
@ -56,11 +57,11 @@ public class FileDictionary implements Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
final class FileIterator implements TermFreqIterator {
|
final class FileIterator implements TermFreqIterator {
|
||||||
private float curFreq;
|
private long curFreq;
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
|
|
||||||
|
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return curFreq;
|
return curFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,7 +74,8 @@ public class FileDictionary implements Dictionary {
|
||||||
if (line != null) {
|
if (line != null) {
|
||||||
String[] fields = line.split("\t");
|
String[] fields = line.split("\t");
|
||||||
if (fields.length > 1) {
|
if (fields.length > 1) {
|
||||||
curFreq = Float.parseFloat(fields[1]);
|
// keep reading floats for bw compat
|
||||||
|
curFreq = (int)Float.parseFloat(fields[1]);
|
||||||
spare.copyChars(fields[0]);
|
spare.copyChars(fields[0]);
|
||||||
} else {
|
} else {
|
||||||
spare.copyChars(line);
|
spare.copyChars(line);
|
||||||
|
@ -86,6 +88,11 @@ public class FileDictionary implements Dictionary {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
|
@ -33,10 +34,10 @@ public abstract class Lookup {
|
||||||
* Result of a lookup.
|
* Result of a lookup.
|
||||||
*/
|
*/
|
||||||
public static final class LookupResult implements Comparable<LookupResult> {
|
public static final class LookupResult implements Comparable<LookupResult> {
|
||||||
public final String key;
|
public final CharSequence key;
|
||||||
public final float value;
|
public final float value;
|
||||||
|
|
||||||
public LookupResult(String key, float value) {
|
public LookupResult(CharSequence key, float value) {
|
||||||
this.key = key;
|
this.key = key;
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
|
@ -48,10 +49,32 @@ public abstract class Lookup {
|
||||||
|
|
||||||
/** Compare alphabetically. */
|
/** Compare alphabetically. */
|
||||||
public int compareTo(LookupResult o) {
|
public int compareTo(LookupResult o) {
|
||||||
return this.key.compareTo(o.key);
|
return CHARSEQUENCE_COMPARATOR.compare(key, o.key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final Comparator<CharSequence> CHARSEQUENCE_COMPARATOR = new CharSequenceComparator();
|
||||||
|
|
||||||
|
private static class CharSequenceComparator implements Comparator<CharSequence> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(CharSequence o1, CharSequence o2) {
|
||||||
|
final int l1 = o1.length();
|
||||||
|
final int l2 = o2.length();
|
||||||
|
|
||||||
|
final int aStop = Math.min(l1, l2);
|
||||||
|
for (int i = 0; i < aStop; i++) {
|
||||||
|
int diff = o1.charAt(i) - o2.charAt(i);
|
||||||
|
if (diff != 0) {
|
||||||
|
return diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// One is a prefix of the other, or, they are equal:
|
||||||
|
return l1 - l2;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
|
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
|
||||||
|
|
||||||
public LookupPriorityQueue(int size) {
|
public LookupPriorityQueue(int size) {
|
||||||
|
@ -99,8 +122,7 @@ public abstract class Lookup {
|
||||||
* @param num maximum number of results to return
|
* @param num maximum number of results to return
|
||||||
* @return a list of possible completions, with their relative weight (e.g. popularity)
|
* @return a list of possible completions, with their relative weight (e.g. popularity)
|
||||||
*/
|
*/
|
||||||
// TODO: this should be a BytesRef API?
|
public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
|
||||||
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Modify the lookup data by recording additional data. Optional operation.
|
* Modify the lookup data by recording additional data. Optional operation.
|
||||||
|
@ -109,16 +131,14 @@ public abstract class Lookup {
|
||||||
* @return true if new key is added, false if it already exists or operation
|
* @return true if new key is added, false if it already exists or operation
|
||||||
* is not supported.
|
* is not supported.
|
||||||
*/
|
*/
|
||||||
// TODO: this should be a BytesRef API?
|
public abstract boolean add(CharSequence key, Object value);
|
||||||
public abstract boolean add(String key, Object value);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get value associated with a specific key.
|
* Get value associated with a specific key.
|
||||||
* @param key lookup key
|
* @param key lookup key
|
||||||
* @return associated value
|
* @return associated value
|
||||||
*/
|
*/
|
||||||
// TODO: this should be a BytesRef API?
|
public abstract Object get(CharSequence key);
|
||||||
public abstract Object get(String key);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Persist the constructed lookup data to a directory. Optional operation.
|
* Persist the constructed lookup data to a directory. Optional operation.
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.SortedIterator;
|
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* This wrapper buffers incoming elements and makes sure they are sorted in
|
* This wrapper buffers incoming elements and makes sure they are sorted in
|
||||||
* ascending lexicographic order.
|
* ascending lexicographic order.
|
||||||
*/
|
*/
|
||||||
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
|
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
|
||||||
|
// TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
|
||||||
private final int[] sortedOrds;
|
private final int[] sortedOrds;
|
||||||
private int currentOrd = -1;
|
private int currentOrd = -1;
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
private final Comparator<BytesRef> comp;
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
|
||||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
|
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
|
||||||
super(source);
|
super(source);
|
||||||
|
@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return freqs[currentOrd];
|
return freqs[currentOrd];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> comparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return comp;
|
return comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* random order.
|
* random order.
|
||||||
*/
|
*/
|
||||||
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
|
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
|
||||||
|
// TODO keep this for now
|
||||||
private final int[] ords;
|
private final int[] ords;
|
||||||
private int currentOrd = -1;
|
private int currentOrd = -1;
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
|
@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return freqs[currentOrd];
|
return freqs[currentOrd];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -199,7 +199,7 @@ public class FSTCompletion {
|
||||||
* @return Returns the suggestions, sorted by their approximated weight first
|
* @return Returns the suggestions, sorted by their approximated weight first
|
||||||
* (decreasing) and then alphabetically (UTF-8 codepoint order).
|
* (decreasing) and then alphabetically (UTF-8 codepoint order).
|
||||||
*/
|
*/
|
||||||
public List<Completion> lookup(String key, int num) {
|
public List<Completion> lookup(CharSequence key, int num) {
|
||||||
if (key.length() == 0 || automaton == null) {
|
if (key.length() == 0 || automaton == null) {
|
||||||
return EMPTY_RESULT;
|
return EMPTY_RESULT;
|
||||||
}
|
}
|
||||||
|
@ -388,7 +388,7 @@ public class FSTCompletion {
|
||||||
* Returns the bucket assigned to a given key (if found) or <code>null</code> if
|
* Returns the bucket assigned to a given key (if found) or <code>null</code> if
|
||||||
* no exact match exists.
|
* no exact match exists.
|
||||||
*/
|
*/
|
||||||
public Integer getBucket(String key) {
|
public Integer getBucket(CharSequence key) {
|
||||||
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
|
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ import org.apache.lucene.util.fst.*;
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* At runtime, in {@link FSTCompletion#lookup(String, int)},
|
* At runtime, in {@link FSTCompletion#lookup(CharSequence, int)},
|
||||||
* the automaton is utilized as follows:
|
* the automaton is utilized as follows:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>For each possible term weight encoded in the automaton (cached arcs from
|
* <li>For each possible term weight encoded in the automaton (cached arcs from
|
||||||
|
|
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutputs;
|
||||||
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
|
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
|
||||||
*
|
*
|
||||||
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
|
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
|
||||||
* to discretize any "weights" as passed from in {@link TermFreqIterator#freq()}
|
* to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
|
||||||
* to match the number of buckets. For the rationale for bucketing, see
|
* to match the number of buckets. For the rationale for bucketing, see
|
||||||
* {@link FSTCompletion}.
|
* {@link FSTCompletion}.
|
||||||
*
|
*
|
||||||
|
@ -171,7 +171,7 @@ public class FSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
output.reset(buffer);
|
output.reset(buffer);
|
||||||
output.writeInt(FloatMagic.toSortable(tfit.freq()));
|
output.writeInt(FloatMagic.toSortable(tfit.weight()));
|
||||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||||
writer.write(buffer, 0, output.getPosition());
|
writer.write(buffer, 0, output.getPosition());
|
||||||
}
|
}
|
||||||
|
@ -232,7 +232,7 @@ public class FSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<LookupResult> lookup(String key, boolean higherWeightsFirst, int num) {
|
public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
|
||||||
final List<Completion> completions;
|
final List<Completion> completions;
|
||||||
if (higherWeightsFirst) {
|
if (higherWeightsFirst) {
|
||||||
completions = higherWeightsCompletion.lookup(key, num);
|
completions = higherWeightsCompletion.lookup(key, num);
|
||||||
|
@ -241,20 +241,23 @@ public class FSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
|
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
|
||||||
|
CharsRef spare = new CharsRef();
|
||||||
for (Completion c : completions) {
|
for (Completion c : completions) {
|
||||||
results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket));
|
spare.grow(c.utf8.length);
|
||||||
|
UnicodeUtil.UTF8toUTF16(c.utf8, spare);
|
||||||
|
results.add(new LookupResult(spare.toString(), c.bucket));
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(String key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
// Not supported.
|
// Not supported.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Float get(String key) {
|
public Object get(CharSequence key) {
|
||||||
Integer bucket = normalCompletion.getBucket(key);
|
Integer bucket = normalCompletion.getBucket(key);
|
||||||
if (bucket == null)
|
if (bucket == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -33,8 +33,10 @@ import org.apache.lucene.store.InputStreamDataInput;
|
||||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FST.Arc;
|
import org.apache.lucene.util.fst.FST.Arc;
|
||||||
|
@ -121,7 +123,7 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
output.reset(buffer);
|
output.reset(buffer);
|
||||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||||
output.writeByte((byte)0); // separator: not used, just for sort order
|
output.writeByte((byte)0); // separator: not used, just for sort order
|
||||||
output.writeInt((int)encodeWeight(iterator.freq()));
|
output.writeInt((int)encodeWeight(iterator.weight()));
|
||||||
writer.write(buffer, 0, output.getPosition());
|
writer.write(buffer, 0, output.getPosition());
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -200,7 +202,7 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
|
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
|
||||||
assert num > 0;
|
assert num > 0;
|
||||||
BytesRef scratch = new BytesRef(key);
|
BytesRef scratch = new BytesRef(key);
|
||||||
int prefixLength = scratch.length;
|
int prefixLength = scratch.length;
|
||||||
|
@ -217,8 +219,11 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
List<LookupResult> results = new ArrayList<LookupResult>(num);
|
List<LookupResult> results = new ArrayList<LookupResult>(num);
|
||||||
|
CharsRef spare = new CharsRef();
|
||||||
if (exactFirst && arc.isFinal()) {
|
if (exactFirst && arc.isFinal()) {
|
||||||
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
|
spare.grow(scratch.length);
|
||||||
|
UnicodeUtil.UTF8toUTF16(scratch, spare);
|
||||||
|
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
|
||||||
if (--num == 0) {
|
if (--num == 0) {
|
||||||
return results; // that was quick
|
return results; // that was quick
|
||||||
}
|
}
|
||||||
|
@ -236,8 +241,9 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
// append suffix
|
// append suffix
|
||||||
Util.toBytesRef(completion.input, suffix);
|
Util.toBytesRef(completion.input, suffix);
|
||||||
scratch.append(suffix);
|
scratch.append(suffix);
|
||||||
|
spare.grow(scratch.length);
|
||||||
results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output)));
|
UnicodeUtil.UTF8toUTF16(scratch, spare);
|
||||||
|
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output)));
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
@ -264,7 +270,7 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(String key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
return false; // Not supported.
|
return false; // Not supported.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,7 +279,7 @@ public class WFSTCompletionLookup extends Lookup {
|
||||||
* or null if it does not exist.
|
* or null if it does not exist.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Float get(String key) {
|
public Object get(CharSequence key) {
|
||||||
Arc<Long> arc = new Arc<Long>();
|
Arc<Long> arc = new Arc<Long>();
|
||||||
Long result = null;
|
Long result = null;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -28,7 +28,6 @@ import java.io.OutputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.SortedIterator;
|
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
import org.apache.lucene.search.suggest.Lookup;
|
import org.apache.lucene.search.suggest.Lookup;
|
||||||
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
|
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
|
||||||
|
@ -45,7 +44,7 @@ public class JaspellLookup extends Lookup {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void build(TermFreqIterator tfit) throws IOException {
|
public void build(TermFreqIterator tfit) throws IOException {
|
||||||
if (tfit instanceof SortedIterator) {
|
if (tfit.getComparator() != null) {
|
||||||
// make sure it's unsorted
|
// make sure it's unsorted
|
||||||
// WTF - this could result in yet another sorted iteration....
|
// WTF - this could result in yet another sorted iteration....
|
||||||
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
|
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
|
||||||
|
@ -56,7 +55,7 @@ public class JaspellLookup extends Lookup {
|
||||||
final CharsRef charsSpare = new CharsRef();
|
final CharsRef charsSpare = new CharsRef();
|
||||||
|
|
||||||
while ((spare = tfit.next()) != null) {
|
while ((spare = tfit.next()) != null) {
|
||||||
float freq = tfit.freq();
|
float freq = tfit.weight();
|
||||||
if (spare.length == 0) {
|
if (spare.length == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -67,19 +66,19 @@ public class JaspellLookup extends Lookup {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(String key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
trie.put(key, value);
|
trie.put(key, value);
|
||||||
// XXX
|
// XXX
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object get(String key) {
|
public Object get(CharSequence key) {
|
||||||
return trie.get(key);
|
return trie.get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
|
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
|
||||||
List<LookupResult> res = new ArrayList<LookupResult>();
|
List<LookupResult> res = new ArrayList<LookupResult>();
|
||||||
List<String> list;
|
List<String> list;
|
||||||
int count = onlyMorePopular ? num * 2 : num;
|
int count = onlyMorePopular ? num * 2 : num;
|
||||||
|
@ -97,7 +96,7 @@ public class JaspellLookup extends Lookup {
|
||||||
LookupPriorityQueue queue = new LookupPriorityQueue(num);
|
LookupPriorityQueue queue = new LookupPriorityQueue(num);
|
||||||
for (String s : list) {
|
for (String s : list) {
|
||||||
float freq = (Float)trie.get(s);
|
float freq = (Float)trie.get(s);
|
||||||
queue.insertWithOverflow(new LookupResult(s, freq));
|
queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
|
||||||
}
|
}
|
||||||
for (LookupResult lr : queue.getResults()) {
|
for (LookupResult lr : queue.getResults()) {
|
||||||
res.add(lr);
|
res.add(lr);
|
||||||
|
@ -106,7 +105,7 @@ public class JaspellLookup extends Lookup {
|
||||||
for (int i = 0; i < maxCnt; i++) {
|
for (int i = 0; i < maxCnt; i++) {
|
||||||
String s = list.get(i);
|
String s = list.get(i);
|
||||||
float freq = (Float)trie.get(s);
|
float freq = (Float)trie.get(s);
|
||||||
res.add(new LookupResult(s, freq));
|
res.add(new LookupResult(new CharsRef(s), freq));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
|
@ -368,8 +368,8 @@ public class JaspellTernarySearchTrie {
|
||||||
* A <code>String</code> index.
|
* A <code>String</code> index.
|
||||||
*@return The object retrieved from the Ternary Search Trie.
|
*@return The object retrieved from the Ternary Search Trie.
|
||||||
*/
|
*/
|
||||||
public Object get(String key) {
|
public Object get(CharSequence key) {
|
||||||
TSTNode node = getNode(key.trim().toLowerCase());
|
TSTNode node = getNode(key);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -435,7 +435,7 @@ public class JaspellTernarySearchTrie {
|
||||||
*@return The node object indexed by key. This object is an instance of an
|
*@return The node object indexed by key. This object is an instance of an
|
||||||
* inner class named <code>TernarySearchTrie.TSTNode</code>.
|
* inner class named <code>TernarySearchTrie.TSTNode</code>.
|
||||||
*/
|
*/
|
||||||
public TSTNode getNode(String key) {
|
public TSTNode getNode(CharSequence key) {
|
||||||
return getNode(key, rootNode);
|
return getNode(key, rootNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,15 +443,14 @@ public class JaspellTernarySearchTrie {
|
||||||
* Returns the node indexed by key, or <code>null</code> if that node doesn't
|
* Returns the node indexed by key, or <code>null</code> if that node doesn't
|
||||||
* exist. The search begins at root node.
|
* exist. The search begins at root node.
|
||||||
*
|
*
|
||||||
*@param key2
|
*@param key
|
||||||
* A <code>String</code> that indexes the node that is returned.
|
* A <code>String</code> that indexes the node that is returned.
|
||||||
*@param startNode
|
*@param startNode
|
||||||
* The top node defining the subtrie to be searched.
|
* The top node defining the subtrie to be searched.
|
||||||
*@return The node object indexed by key. This object is an instance of an
|
*@return The node object indexed by key. This object is an instance of an
|
||||||
* inner class named <code>TernarySearchTrie.TSTNode</code>.
|
* inner class named <code>TernarySearchTrie.TSTNode</code>.
|
||||||
*/
|
*/
|
||||||
protected TSTNode getNode(String key2, TSTNode startNode) {
|
protected TSTNode getNode(CharSequence key, TSTNode startNode) {
|
||||||
String key = key2.trim().toLowerCase();
|
|
||||||
if (key == null || startNode == null || key.length() == 0) {
|
if (key == null || startNode == null || key.length() == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -490,7 +489,7 @@ public class JaspellTernarySearchTrie {
|
||||||
*@exception IllegalArgumentException
|
*@exception IllegalArgumentException
|
||||||
* If the key is an empty <code>String</code>.
|
* If the key is an empty <code>String</code>.
|
||||||
*/
|
*/
|
||||||
protected TSTNode getOrCreateNode(String key) throws NullPointerException,
|
protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException,
|
||||||
IllegalArgumentException {
|
IllegalArgumentException {
|
||||||
if (key == null) {
|
if (key == null) {
|
||||||
throw new NullPointerException(
|
throw new NullPointerException(
|
||||||
|
@ -568,7 +567,7 @@ public class JaspellTernarySearchTrie {
|
||||||
* The maximum number of values returned by this method.
|
* The maximum number of values returned by this method.
|
||||||
*@return A <code>List</code> with the results
|
*@return A <code>List</code> with the results
|
||||||
*/
|
*/
|
||||||
public List<String> matchAlmost(String key, int numReturnValues) {
|
public List<String> matchAlmost(CharSequence key, int numReturnValues) {
|
||||||
return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
|
return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
|
||||||
((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
|
((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
|
||||||
}
|
}
|
||||||
|
@ -598,7 +597,7 @@ public class JaspellTernarySearchTrie {
|
||||||
*@return A <code>List</code> with the results.
|
*@return A <code>List</code> with the results.
|
||||||
*/
|
*/
|
||||||
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
|
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
|
||||||
int d, String matchAlmostKey, int matchAlmostNumReturnValues,
|
int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues,
|
||||||
List<String> matchAlmostResult2, boolean upTo) {
|
List<String> matchAlmostResult2, boolean upTo) {
|
||||||
if ((currentNode == null)
|
if ((currentNode == null)
|
||||||
|| (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
|
|| (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
|
||||||
|
@ -658,7 +657,7 @@ public class JaspellTernarySearchTrie {
|
||||||
* The maximum number of values returned from this method.
|
* The maximum number of values returned from this method.
|
||||||
*@return A <code>List</code> with the results
|
*@return A <code>List</code> with the results
|
||||||
*/
|
*/
|
||||||
public List<String> matchPrefix(String prefix, int numReturnValues) {
|
public List<String> matchPrefix(CharSequence prefix, int numReturnValues) {
|
||||||
Vector<String> sortKeysResult = new Vector<String>();
|
Vector<String> sortKeysResult = new Vector<String>();
|
||||||
TSTNode startNode = getNode(prefix);
|
TSTNode startNode = getNode(prefix);
|
||||||
if (startNode == null) {
|
if (startNode == null) {
|
||||||
|
@ -722,8 +721,8 @@ public class JaspellTernarySearchTrie {
|
||||||
*@param value
|
*@param value
|
||||||
* The object to be stored in the Trie.
|
* The object to be stored in the Trie.
|
||||||
*/
|
*/
|
||||||
public void put(String key, Object value) {
|
public void put(CharSequence key, Object value) {
|
||||||
getOrCreateNode(key.trim().toLowerCase()).data = value;
|
getOrCreateNode(key).data = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -57,7 +57,7 @@ public class TSTAutocomplete {
|
||||||
* index of character in key to be inserted currently.
|
* index of character in key to be inserted currently.
|
||||||
* @return currentNode The new reference to root node of TST
|
* @return currentNode The new reference to root node of TST
|
||||||
*/
|
*/
|
||||||
public TernaryTreeNode insert(TernaryTreeNode currentNode, String s,
|
public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s,
|
||||||
Object val, int x) {
|
Object val, int x) {
|
||||||
if (s == null || s.length() <= x) {
|
if (s == null || s.length() <= x) {
|
||||||
return currentNode;
|
return currentNode;
|
||||||
|
@ -69,7 +69,7 @@ public class TSTAutocomplete {
|
||||||
if (x < s.length() - 1) {
|
if (x < s.length() - 1) {
|
||||||
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
|
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
|
||||||
} else {
|
} else {
|
||||||
currentNode.token = s;
|
currentNode.token = s.toString();
|
||||||
currentNode.val = val;
|
currentNode.val = val;
|
||||||
return currentNode;
|
return currentNode;
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@ public class TSTAutocomplete {
|
||||||
if (x < s.length() - 1) {
|
if (x < s.length() - 1) {
|
||||||
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
|
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
|
||||||
} else {
|
} else {
|
||||||
currentNode.token = s;
|
currentNode.token = s.toString();
|
||||||
currentNode.val = val;
|
currentNode.val = val;
|
||||||
return currentNode;
|
return currentNode;
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ public class TSTAutocomplete {
|
||||||
* @return suggest list of auto-completed keys for the given prefix query.
|
* @return suggest list of auto-completed keys for the given prefix query.
|
||||||
*/
|
*/
|
||||||
public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root,
|
public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root,
|
||||||
String s, int x) {
|
CharSequence s, int x) {
|
||||||
|
|
||||||
TernaryTreeNode p = root;
|
TernaryTreeNode p = root;
|
||||||
ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>();
|
ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>();
|
||||||
|
|
|
@ -30,7 +30,6 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.suggest.Lookup;
|
import org.apache.lucene.search.suggest.Lookup;
|
||||||
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
||||||
import org.apache.lucene.search.spell.SortedIterator;
|
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -45,7 +44,7 @@ public class TSTLookup extends Lookup {
|
||||||
public void build(TermFreqIterator tfit) throws IOException {
|
public void build(TermFreqIterator tfit) throws IOException {
|
||||||
root = new TernaryTreeNode();
|
root = new TernaryTreeNode();
|
||||||
// buffer first
|
// buffer first
|
||||||
if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
|
if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
|
||||||
// make sure it's sorted and the comparator uses UTF16 sort order
|
// make sure it's sorted and the comparator uses UTF16 sort order
|
||||||
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
|
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||||
}
|
}
|
||||||
|
@ -58,34 +57,47 @@ public class TSTLookup extends Lookup {
|
||||||
charsSpare.grow(spare.length);
|
charsSpare.grow(spare.length);
|
||||||
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
|
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
|
||||||
tokens.add(charsSpare.toString());
|
tokens.add(charsSpare.toString());
|
||||||
vals.add(new Float(tfit.freq()));
|
vals.add(new Float(tfit.weight()));
|
||||||
}
|
}
|
||||||
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(String key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
autocomplete.insert(root, key, value, 0);
|
autocomplete.insert(root, key, value, 0);
|
||||||
// XXX we don't know if a new node was created
|
// XXX we don't know if a new node was created
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object get(String key) {
|
public Object get(CharSequence key) {
|
||||||
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
||||||
if (list == null || list.isEmpty()) {
|
if (list == null || list.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
for (TernaryTreeNode n : list) {
|
for (TernaryTreeNode n : list) {
|
||||||
if (n.token.equals(key)) {
|
if (charSeqEquals(n.token, key)) {
|
||||||
return n.val;
|
return n.val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean charSeqEquals(CharSequence left, CharSequence right) {
|
||||||
|
int len = left.length();
|
||||||
|
if (len != right.length()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
if (left.charAt(i) != right.charAt(i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
|
public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
|
||||||
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
||||||
List<LookupResult> res = new ArrayList<LookupResult>();
|
List<LookupResult> res = new ArrayList<LookupResult>();
|
||||||
if (list == null || list.size() == 0) {
|
if (list == null || list.size() == 0) {
|
||||||
|
|
|
@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
||||||
while ((line = br.readLine()) != null) {
|
while ((line = br.readLine()) != null) {
|
||||||
int tab = line.indexOf('|');
|
int tab = line.indexOf('|');
|
||||||
assertTrue("No | separator?: " + line, tab >= 0);
|
assertTrue("No | separator?: " + line, tab >= 0);
|
||||||
float weight = Float.parseFloat(line.substring(tab + 1));
|
int weight = Integer.parseInt(line.substring(tab + 1));
|
||||||
String key = line.substring(0, tab);
|
String key = line.substring(0, tab);
|
||||||
input.add(new TermFreq(key, weight));
|
input.add(new TermFreq(key, weight));
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
|
||||||
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
|
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
|
||||||
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
public class PersistenceTest extends LuceneTestCase {
|
public class PersistenceTest extends LuceneTestCase {
|
||||||
public final String[] keys = new String[] {
|
public final String[] keys = new String[] {
|
||||||
|
@ -61,7 +62,7 @@ public class PersistenceTest extends LuceneTestCase {
|
||||||
Lookup lookup = lookupClass.newInstance();
|
Lookup lookup = lookupClass.newInstance();
|
||||||
TermFreq[] keys = new TermFreq[this.keys.length];
|
TermFreq[] keys = new TermFreq[this.keys.length];
|
||||||
for (int i = 0; i < keys.length; i++)
|
for (int i = 0; i < keys.length; i++)
|
||||||
keys[i] = new TermFreq(this.keys[i], (float) i);
|
keys[i] = new TermFreq(this.keys[i], i);
|
||||||
lookup.build(new TermFreqArrayIterator(keys));
|
lookup.build(new TermFreqArrayIterator(keys));
|
||||||
|
|
||||||
// Store the suggester.
|
// Store the suggester.
|
||||||
|
@ -75,7 +76,7 @@ public class PersistenceTest extends LuceneTestCase {
|
||||||
// Assert validity.
|
// Assert validity.
|
||||||
float previous = Float.NEGATIVE_INFINITY;
|
float previous = Float.NEGATIVE_INFINITY;
|
||||||
for (TermFreq k : keys) {
|
for (TermFreq k : keys) {
|
||||||
Float val = (Float) lookup.get(k.term.utf8ToString());
|
Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
|
||||||
assertNotNull(k.term.utf8ToString(), val);
|
assertNotNull(k.term.utf8ToString(), val);
|
||||||
|
|
||||||
if (supportsExactWeights) {
|
if (supportsExactWeights) {
|
||||||
|
|
|
@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
public final class TermFreq {
|
public final class TermFreq {
|
||||||
public final BytesRef term;
|
public final BytesRef term;
|
||||||
public final float v;
|
public final long v;
|
||||||
|
|
||||||
public TermFreq(String term, float v) {
|
public TermFreq(String term, long v) {
|
||||||
this(new BytesRef(term), v);
|
this(new BytesRef(term), v);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TermFreq(BytesRef term, float v) {
|
public TermFreq(BytesRef term, long v) {
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.v = v;
|
this.v = v;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||||
|
@ -44,7 +45,7 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
|
||||||
this(i.iterator());
|
this(i.iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
public float freq() {
|
public long weight() {
|
||||||
return current.v;
|
return current.v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,4 +58,9 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.suggest.BytesRefList;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
||||||
import org.apache.lucene.search.spell.SortedIterator;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -37,8 +36,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase {
|
||||||
IndexReader ir = DirectoryReader.open(dir);
|
IndexReader ir = DirectoryReader.open(dir);
|
||||||
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
|
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
|
||||||
BytesRefIterator tf = dictionary.getWordsIterator();
|
BytesRefIterator tf = dictionary.getWordsIterator();
|
||||||
assertTrue(tf instanceof SortedIterator);
|
assertNull(tf.getComparator());
|
||||||
((SortedIterator)tf).comparator();
|
|
||||||
assertNull(tf.next());
|
assertNull(tf.next());
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class TestTermFreqIterator extends LuceneTestCase {
|
||||||
public void testTerms() throws Exception {
|
public void testTerms() throws Exception {
|
||||||
int num = atLeast(10000);
|
int num = atLeast(10000);
|
||||||
|
|
||||||
TreeMap<BytesRef,Float> sorted = new TreeMap<BytesRef,Float>();
|
TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
|
||||||
TermFreq[] unsorted = new TermFreq[num];
|
TermFreq[] unsorted = new TermFreq[num];
|
||||||
|
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
|
@ -46,28 +46,28 @@ public class TestTermFreqIterator extends LuceneTestCase {
|
||||||
do {
|
do {
|
||||||
key = new BytesRef(_TestUtil.randomUnicodeString(random));
|
key = new BytesRef(_TestUtil.randomUnicodeString(random));
|
||||||
} while (sorted.containsKey(key));
|
} while (sorted.containsKey(key));
|
||||||
float value = random.nextFloat();
|
long value = random.nextLong();
|
||||||
sorted.put(key, value);
|
sorted.put(key, value);
|
||||||
unsorted[i] = new TermFreq(key, value);
|
unsorted[i] = new TermFreq(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// test the sorted iterator wrapper
|
// test the sorted iterator wrapper
|
||||||
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
|
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
Iterator<Map.Entry<BytesRef,Float>> expected = sorted.entrySet().iterator();
|
Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
|
||||||
while (expected.hasNext()) {
|
while (expected.hasNext()) {
|
||||||
Map.Entry<BytesRef,Float> entry = expected.next();
|
Map.Entry<BytesRef,Long> entry = expected.next();
|
||||||
|
|
||||||
assertEquals(entry.getKey(), wrapper.next());
|
assertEquals(entry.getKey(), wrapper.next());
|
||||||
assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F);
|
assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
|
||||||
}
|
}
|
||||||
assertNull(wrapper.next());
|
assertNull(wrapper.next());
|
||||||
|
|
||||||
// test the unsorted iterator wrapper
|
// test the unsorted iterator wrapper
|
||||||
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
|
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
|
||||||
TreeMap<BytesRef,Float> actual = new TreeMap<BytesRef,Float>();
|
TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
|
||||||
BytesRef key;
|
BytesRef key;
|
||||||
while ((key = wrapper.next()) != null) {
|
while ((key = wrapper.next()) != null) {
|
||||||
float value = wrapper.freq();
|
long value = wrapper.weight();
|
||||||
actual.put(BytesRef.deepCopyOf(key), value);
|
actual.put(BytesRef.deepCopyOf(key), value);
|
||||||
}
|
}
|
||||||
assertEquals(sorted, actual);
|
assertEquals(sorted, actual);
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.*;
|
||||||
* Unit tests for {@link FSTCompletion}.
|
* Unit tests for {@link FSTCompletion}.
|
||||||
*/
|
*/
|
||||||
public class FSTCompletionTest extends LuceneTestCase {
|
public class FSTCompletionTest extends LuceneTestCase {
|
||||||
public static TermFreq tf(String t, float v) {
|
public static TermFreq tf(String t, int v) {
|
||||||
return new TermFreq(t, v);
|
return new TermFreq(t, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,28 +62,28 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
tf("foundation", 1),
|
tf("foundation", 1),
|
||||||
tf("fourblah", 1),
|
tf("fourblah", 1),
|
||||||
tf("fourteen", 1),
|
tf("fourteen", 1),
|
||||||
tf("four", 0f),
|
tf("four", 0),
|
||||||
tf("fourier", 0f),
|
tf("fourier", 0),
|
||||||
tf("fourty", 0f),
|
tf("fourty", 0),
|
||||||
tf("xo", 1),
|
tf("xo", 1),
|
||||||
};
|
};
|
||||||
return keys;
|
return keys;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExactMatchHighPriority() throws Exception {
|
public void testExactMatchHighPriority() throws Exception {
|
||||||
assertMatchEquals(completion.lookup("two", 1),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("two", random), 1),
|
||||||
"two/1.0");
|
"two/1.0");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExactMatchLowPriority() throws Exception {
|
public void testExactMatchLowPriority() throws Exception {
|
||||||
assertMatchEquals(completion.lookup("one", 2),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
|
||||||
"one/0.0",
|
"one/0.0",
|
||||||
"oneness/1.0");
|
"oneness/1.0");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExactMatchReordering() throws Exception {
|
public void testExactMatchReordering() throws Exception {
|
||||||
// Check reordering of exact matches.
|
// Check reordering of exact matches.
|
||||||
assertMatchEquals(completion.lookup("four", 4),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
|
||||||
"four/0.0",
|
"four/0.0",
|
||||||
"fourblah/1.0",
|
"fourblah/1.0",
|
||||||
"fourteen/1.0",
|
"fourteen/1.0",
|
||||||
|
@ -92,49 +92,49 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
|
|
||||||
public void testRequestedCount() throws Exception {
|
public void testRequestedCount() throws Exception {
|
||||||
// 'one' is promoted after collecting two higher ranking results.
|
// 'one' is promoted after collecting two higher ranking results.
|
||||||
assertMatchEquals(completion.lookup("one", 2),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
|
||||||
"one/0.0",
|
"one/0.0",
|
||||||
"oneness/1.0");
|
"oneness/1.0");
|
||||||
|
|
||||||
// 'four' is collected in a bucket and then again as an exact match.
|
// 'four' is collected in a bucket and then again as an exact match.
|
||||||
assertMatchEquals(completion.lookup("four", 2),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 2),
|
||||||
"four/0.0",
|
"four/0.0",
|
||||||
"fourblah/1.0");
|
"fourblah/1.0");
|
||||||
|
|
||||||
// Check reordering of exact matches.
|
// Check reordering of exact matches.
|
||||||
assertMatchEquals(completion.lookup("four", 4),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
|
||||||
"four/0.0",
|
"four/0.0",
|
||||||
"fourblah/1.0",
|
"fourblah/1.0",
|
||||||
"fourteen/1.0",
|
"fourteen/1.0",
|
||||||
"fourier/0.0");
|
"fourier/0.0");
|
||||||
|
|
||||||
// 'one' is at the top after collecting all alphabetical results.
|
// 'one' is at the top after collecting all alphabetical results.
|
||||||
assertMatchEquals(completionAlphabetical.lookup("one", 2),
|
assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
|
||||||
"one/0.0",
|
"one/0.0",
|
||||||
"oneness/1.0");
|
"oneness/1.0");
|
||||||
|
|
||||||
// 'one' is not promoted after collecting two higher ranking results.
|
// 'one' is not promoted after collecting two higher ranking results.
|
||||||
FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
|
FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
|
||||||
assertMatchEquals(noPromotion.lookup("one", 2),
|
assertMatchEquals(noPromotion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
|
||||||
"oneness/1.0",
|
"oneness/1.0",
|
||||||
"onerous/1.0");
|
"onerous/1.0");
|
||||||
|
|
||||||
// 'one' is at the top after collecting all alphabetical results.
|
// 'one' is at the top after collecting all alphabetical results.
|
||||||
assertMatchEquals(completionAlphabetical.lookup("one", 2),
|
assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
|
||||||
"one/0.0",
|
"one/0.0",
|
||||||
"oneness/1.0");
|
"oneness/1.0");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMiss() throws Exception {
|
public void testMiss() throws Exception {
|
||||||
assertMatchEquals(completion.lookup("xyz", 1));
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("xyz", random), 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAlphabeticWithWeights() throws Exception {
|
public void testAlphabeticWithWeights() throws Exception {
|
||||||
assertEquals(0, completionAlphabetical.lookup("xyz", 1).size());
|
assertEquals(0, completionAlphabetical.lookup(_TestUtil.stringToCharSequence("xyz", random), 1).size());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFullMatchList() throws Exception {
|
public void testFullMatchList() throws Exception {
|
||||||
assertMatchEquals(completion.lookup("one", Integer.MAX_VALUE),
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), Integer.MAX_VALUE),
|
||||||
"oneness/1.0",
|
"oneness/1.0",
|
||||||
"onerous/1.0",
|
"onerous/1.0",
|
||||||
"onesimus/1.0",
|
"onesimus/1.0",
|
||||||
|
@ -148,7 +148,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
builder.add(new BytesRef(key), 0);
|
builder.add(new BytesRef(key), 0);
|
||||||
|
|
||||||
FSTCompletion lookup = builder.build();
|
FSTCompletion lookup = builder.build();
|
||||||
List<Completion> result = lookup.lookup(key, 1);
|
List<Completion> result = lookup.lookup(_TestUtil.stringToCharSequence(key, random), 1);
|
||||||
assertEquals(1, result.size());
|
assertEquals(1, result.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
Random r = random;
|
Random r = random;
|
||||||
List<TermFreq> keys = new ArrayList<TermFreq>();
|
List<TermFreq> keys = new ArrayList<TermFreq>();
|
||||||
for (int i = 0; i < 5000; i++) {
|
for (int i = 0; i < 5000; i++) {
|
||||||
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f));
|
keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
lookup.build(new TermFreqArrayIterator(keys));
|
lookup.build(new TermFreqArrayIterator(keys));
|
||||||
|
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
// are.
|
// are.
|
||||||
Float previous = null;
|
Float previous = null;
|
||||||
for (TermFreq tf : keys) {
|
for (TermFreq tf : keys) {
|
||||||
Float current = lookup.get(tf.term.utf8ToString());
|
Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
assertEquals(previous, current);
|
assertEquals(previous, current);
|
||||||
}
|
}
|
||||||
|
@ -180,28 +180,27 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
|
|
||||||
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
||||||
lookup.build(new TermFreqArrayIterator(input));
|
lookup.build(new TermFreqArrayIterator(input));
|
||||||
|
|
||||||
for (TermFreq tf : input) {
|
for (TermFreq tf : input) {
|
||||||
assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
|
assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
|
||||||
assertEquals(tf.term.utf8ToString(), lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
|
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
List<LookupResult> result = lookup.lookup("wit", true, 5);
|
List<LookupResult> result = lookup.lookup(_TestUtil.stringToCharSequence("wit", random), true, 5);
|
||||||
assertEquals(5, result.size());
|
assertEquals(5, result.size());
|
||||||
assertTrue(result.get(0).key.equals("wit")); // exact match.
|
assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
|
||||||
assertTrue(result.get(1).key.equals("with")); // highest count.
|
assertTrue(result.get(1).key.toString().equals("with")); // highest count.
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEmptyInput() throws Exception {
|
public void testEmptyInput() throws Exception {
|
||||||
completion = new FSTCompletionBuilder().build();
|
completion = new FSTCompletionBuilder().build();
|
||||||
assertMatchEquals(completion.lookup("", 10));
|
assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("", random), 10));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandom() throws Exception {
|
public void testRandom() throws Exception {
|
||||||
List<TermFreq> freqs = new ArrayList<TermFreq>();
|
List<TermFreq> freqs = new ArrayList<TermFreq>();
|
||||||
Random rnd = random;
|
Random rnd = random;
|
||||||
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
|
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
|
||||||
float weight = rnd.nextFloat() * 100;
|
int weight = random.nextInt(100);
|
||||||
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
|
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,8 +211,8 @@ public class FSTCompletionTest extends LuceneTestCase {
|
||||||
final String term = tf.term.utf8ToString();
|
final String term = tf.term.utf8ToString();
|
||||||
for (int i = 1; i < term.length(); i++) {
|
for (int i = 1; i < term.length(); i++) {
|
||||||
String prefix = term.substring(0, i);
|
String prefix = term.substring(0, i);
|
||||||
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
|
for (LookupResult lr : lookup.lookup(_TestUtil.stringToCharSequence(prefix, random), true, 10)) {
|
||||||
assertTrue(lr.key.startsWith(prefix));
|
assertTrue(lr.key.toString().startsWith(prefix));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,33 +45,33 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
||||||
suggester.build(new TermFreqArrayIterator(keys));
|
suggester.build(new TermFreqArrayIterator(keys));
|
||||||
|
|
||||||
// top N of 2, but only foo is available
|
// top N of 2, but only foo is available
|
||||||
List<LookupResult> results = suggester.lookup("f", false, 2);
|
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
|
||||||
assertEquals(1, results.size());
|
assertEquals(1, results.size());
|
||||||
assertEquals("foo", results.get(0).key);
|
assertEquals("foo", results.get(0).key.toString());
|
||||||
assertEquals(50, results.get(0).value, 0.01F);
|
assertEquals(50, results.get(0).value, 0.01F);
|
||||||
|
|
||||||
// top N of 1 for 'bar': we return this even though barbar is higher
|
// top N of 1 for 'bar': we return this even though barbar is higher
|
||||||
results = suggester.lookup("bar", false, 1);
|
results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1);
|
||||||
assertEquals(1, results.size());
|
assertEquals(1, results.size());
|
||||||
assertEquals("bar", results.get(0).key);
|
assertEquals("bar", results.get(0).key.toString());
|
||||||
assertEquals(10, results.get(0).value, 0.01F);
|
assertEquals(10, results.get(0).value, 0.01F);
|
||||||
|
|
||||||
// top N Of 2 for 'b'
|
// top N Of 2 for 'b'
|
||||||
results = suggester.lookup("b", false, 2);
|
results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2);
|
||||||
assertEquals(2, results.size());
|
assertEquals(2, results.size());
|
||||||
assertEquals("barbar", results.get(0).key);
|
assertEquals("barbar", results.get(0).key.toString());
|
||||||
assertEquals(12, results.get(0).value, 0.01F);
|
assertEquals(12, results.get(0).value, 0.01F);
|
||||||
assertEquals("bar", results.get(1).key);
|
assertEquals("bar", results.get(1).key.toString());
|
||||||
assertEquals(10, results.get(1).value, 0.01F);
|
assertEquals(10, results.get(1).value, 0.01F);
|
||||||
|
|
||||||
// top N of 3 for 'ba'
|
// top N of 3 for 'ba'
|
||||||
results = suggester.lookup("ba", false, 3);
|
results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3);
|
||||||
assertEquals(3, results.size());
|
assertEquals(3, results.size());
|
||||||
assertEquals("barbar", results.get(0).key);
|
assertEquals("barbar", results.get(0).key.toString());
|
||||||
assertEquals(12, results.get(0).value, 0.01F);
|
assertEquals(12, results.get(0).value, 0.01F);
|
||||||
assertEquals("bar", results.get(1).key);
|
assertEquals("bar", results.get(1).key.toString());
|
||||||
assertEquals(10, results.get(1).value, 0.01F);
|
assertEquals(10, results.get(1).value, 0.01F);
|
||||||
assertEquals("barbara", results.get(2).key);
|
assertEquals("barbara", results.get(2).key.toString());
|
||||||
assertEquals(6, results.get(2).value, 0.01F);
|
assertEquals(6, results.get(2).value, 0.01F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
||||||
// we can probably do Integer.MAX_VALUE here, but why worry.
|
// we can probably do Integer.MAX_VALUE here, but why worry.
|
||||||
int weight = random.nextInt(1<<24);
|
int weight = random.nextInt(1<<24);
|
||||||
slowCompletor.put(s, (long)weight);
|
slowCompletor.put(s, (long)weight);
|
||||||
keys[i] = new TermFreq(s, (float) weight);
|
keys[i] = new TermFreq(s, weight);
|
||||||
}
|
}
|
||||||
|
|
||||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||||
|
@ -109,7 +109,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
||||||
for (String prefix : allPrefixes) {
|
for (String prefix : allPrefixes) {
|
||||||
|
|
||||||
final int topN = _TestUtil.nextInt(random, 1, 10);
|
final int topN = _TestUtil.nextInt(random, 1, 10);
|
||||||
List<LookupResult> r = suggester.lookup(prefix, false, topN);
|
List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN);
|
||||||
|
|
||||||
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
|
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
|
||||||
final List<LookupResult> matches = new ArrayList<LookupResult>();
|
final List<LookupResult> matches = new ArrayList<LookupResult>();
|
||||||
|
@ -126,7 +126,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
||||||
public int compare(LookupResult left, LookupResult right) {
|
public int compare(LookupResult left, LookupResult right) {
|
||||||
int cmp = Float.compare(right.value, left.value);
|
int cmp = Float.compare(right.value, left.value);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
return left.key.compareTo(right.key);
|
return left.compareTo(right);
|
||||||
} else {
|
} else {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
||||||
|
|
||||||
for(int hit=0;hit<r.size();hit++) {
|
for(int hit=0;hit<r.size();hit++) {
|
||||||
//System.out.println(" check hit " + hit);
|
//System.out.println(" check hit " + hit);
|
||||||
assertEquals(matches.get(hit).key, r.get(hit).key);
|
assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
|
||||||
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
|
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
||||||
import org.apache.lucene.search.suggest.FileDictionary;
|
import org.apache.lucene.search.suggest.FileDictionary;
|
||||||
import org.apache.lucene.search.suggest.Lookup;
|
import org.apache.lucene.search.suggest.Lookup;
|
||||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
@ -152,7 +153,7 @@ public class Suggester extends SolrSpellChecker {
|
||||||
build(core, searcher);
|
build(core, searcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(String query, int numHits) {
|
public void add(CharsRef query, int numHits) {
|
||||||
LOG.info("add " + query + ", " + numHits);
|
LOG.info("add " + query + ", " + numHits);
|
||||||
lookup.add(query, new Integer(numHits));
|
lookup.add(query, new Integer(numHits));
|
||||||
}
|
}
|
||||||
|
@ -167,9 +168,12 @@ public class Suggester extends SolrSpellChecker {
|
||||||
return EMPTY_RESULT;
|
return EMPTY_RESULT;
|
||||||
}
|
}
|
||||||
SpellingResult res = new SpellingResult();
|
SpellingResult res = new SpellingResult();
|
||||||
|
CharsRef scratch = new CharsRef();
|
||||||
for (Token t : options.tokens) {
|
for (Token t : options.tokens) {
|
||||||
String term = new String(t.buffer(), 0, t.length());
|
scratch.chars = t.buffer();
|
||||||
List<LookupResult> suggestions = lookup.lookup(term,
|
scratch.offset = 0;
|
||||||
|
scratch.length = t.length();
|
||||||
|
List<LookupResult> suggestions = lookup.lookup(scratch,
|
||||||
options.onlyMorePopular, options.count);
|
options.onlyMorePopular, options.count);
|
||||||
if (suggestions == null) {
|
if (suggestions == null) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -178,7 +182,7 @@ public class Suggester extends SolrSpellChecker {
|
||||||
Collections.sort(suggestions);
|
Collections.sort(suggestions);
|
||||||
}
|
}
|
||||||
for (LookupResult lr : suggestions) {
|
for (LookupResult lr : suggestions) {
|
||||||
res.add(t, lr.key, ((Number)lr.value).intValue());
|
res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
Loading…
Reference in New Issue