mirror of https://github.com/apache/lucene.git
LUCENE-3807: clean up TermFreqIterator API
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1291418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
630addb415
commit
1860439f15
|
@ -23,6 +23,7 @@ import java.util.Comparator;
|
|||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
|
||||
* #seekExact(BytesRef,boolean)}) or step through ({@link
|
||||
|
@ -40,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* of the <code>seek</code> methods.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public abstract class TermsEnum {
|
||||
public abstract class TermsEnum implements BytesRefIterator {
|
||||
|
||||
private AttributeSource atts = null;
|
||||
|
||||
|
@ -114,14 +115,6 @@ public abstract class TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
/** Increments the enumeration to the next term.
|
||||
* Returns the resulting term, or null if the end was
|
||||
* hit (which means the enum is unpositioned). The
|
||||
* returned BytesRef may be re-used across calls to next.
|
||||
* After this method returns null, do not call it again:
|
||||
* the results are undefined. */
|
||||
public abstract BytesRef next() throws IOException;
|
||||
|
||||
/** Returns current term. Do not call this when the enum
|
||||
* is unpositioned. */
|
||||
public abstract BytesRef term() throws IOException;
|
||||
|
|
|
@ -280,6 +280,37 @@ public final class ByteBlockPool {
|
|||
} while(true);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final BytesRef copyFrom(final BytesRef bytes) {
|
||||
final int length = bytes.length;
|
||||
final int offset = bytes.offset;
|
||||
bytes.offset = 0;
|
||||
bytes.grow(length);
|
||||
int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
|
||||
byte[] buffer = buffers[bufferIndex];
|
||||
int pos = offset & BYTE_BLOCK_MASK;
|
||||
int overflow = (pos + length) - BYTE_BLOCK_SIZE;
|
||||
do {
|
||||
if (overflow <= 0) {
|
||||
System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
|
||||
bytes.length = length;
|
||||
bytes.offset = 0;
|
||||
break;
|
||||
} else {
|
||||
final int bytesToCopy = length - overflow;
|
||||
System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
|
||||
pos = 0;
|
||||
bytes.length -= bytesToCopy;
|
||||
bytes.offset += bytesToCopy;
|
||||
buffer = buffers[bufferIndex];
|
||||
overflow = overflow - BYTE_BLOCK_SIZE;
|
||||
}
|
||||
} while (true);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the pools content to the given {@link DataOutput}
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A simple iterator interface for {@link BytesRef} iteration
|
||||
*
|
||||
*/
|
||||
public interface BytesRefIterator {
|
||||
|
||||
public static final BytesRefIterator EMPTY_ITERATOR = new EmptyBytesRefIterator();
|
||||
|
||||
/**
|
||||
* Increments the iteration to the next {@link BytesRef} in the iterator.
|
||||
* Returns the resulting {@link BytesRef} or <code>null</code> if the end of
|
||||
* the iterator is reached. The returned BytesRef may be re-used across calls
|
||||
* to next. After this method returns null, do not call it again: the results
|
||||
* are undefined.
|
||||
*
|
||||
* @return the next {@link BytesRef} in the iterator or <code>null</code> if
|
||||
* the end of the iterator is reached.
|
||||
* @throws IOException
|
||||
*/
|
||||
public BytesRef next() throws IOException;
|
||||
|
||||
public final static class EmptyBytesRefIterator implements BytesRefIterator {
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -16,7 +16,7 @@ package org.apache.lucene.search.spell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Iterator;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
/**
|
||||
* A simple interface representing a Dictionary. A Dictionary
|
||||
|
@ -30,5 +30,5 @@ public interface Dictionary {
|
|||
* Return all words present in the dictionary
|
||||
* @return Iterator
|
||||
*/
|
||||
Iterator<String> getWordsIterator();
|
||||
BytesRefIterator getWordsIterator();
|
||||
}
|
||||
|
|
|
@ -18,12 +18,14 @@
|
|||
package org.apache.lucene.search.spell;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
@ -50,14 +52,13 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
this.thresh = thresh;
|
||||
}
|
||||
|
||||
public final Iterator<String> getWordsIterator() {
|
||||
public final BytesRefIterator getWordsIterator() {
|
||||
return new HighFrequencyIterator();
|
||||
}
|
||||
|
||||
final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
|
||||
private TermsEnum termsEnum;
|
||||
private BytesRef actualTerm;
|
||||
private boolean hasNextCalled;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final TermsEnum termsEnum;
|
||||
private int minNumDocs;
|
||||
|
||||
HighFrequencyIterator() {
|
||||
|
@ -65,6 +66,8 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator(null);
|
||||
} else {
|
||||
termsEnum = null;
|
||||
}
|
||||
minNumDocs = (int)(thresh * (float)reader.numDocs());
|
||||
} catch (IOException e) {
|
||||
|
@ -83,57 +86,27 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
public String next() {
|
||||
if (!hasNextCalled && !hasNext()) {
|
||||
return null;
|
||||
}
|
||||
hasNextCalled = false;
|
||||
|
||||
if (actualTerm == null) {
|
||||
return null;
|
||||
} else {
|
||||
UnicodeUtil.UTF8toUTF16(actualTerm, spare);
|
||||
return spare.toString();
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (termsEnum != null) {
|
||||
BytesRef next = termsEnum.next();
|
||||
if (next != null && isFrequent(termsEnum.docFreq())) {
|
||||
spare.copyBytes(next);
|
||||
return spare;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (hasNextCalled) {
|
||||
return actualTerm != null;
|
||||
@Override
|
||||
public Comparator<BytesRef> comparator() {
|
||||
try {
|
||||
return termsEnum.getComparator();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
hasNextCalled = true;
|
||||
|
||||
if (termsEnum == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while(true) {
|
||||
|
||||
try {
|
||||
actualTerm = termsEnum.next();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// if there are no words return false
|
||||
if (actualTerm == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// got a valid term, does it pass the threshold?
|
||||
try {
|
||||
if (isFrequent(termsEnum.docFreq())) {
|
||||
return true;
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,13 +18,7 @@ package org.apache.lucene.search.spell;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
|
||||
|
@ -49,50 +43,18 @@ public class LuceneDictionary implements Dictionary {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public final Iterator<String> getWordsIterator() {
|
||||
return new LuceneIterator();
|
||||
}
|
||||
|
||||
|
||||
final class LuceneIterator implements Iterator<String> {
|
||||
private TermsEnum termsEnum;
|
||||
private BytesRef pendingTerm;
|
||||
private final CharsRef spare = new CharsRef();
|
||||
|
||||
LuceneIterator() {
|
||||
try {
|
||||
final Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator(null);
|
||||
pendingTerm = termsEnum.next();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
public final BytesRefIterator getWordsIterator() {
|
||||
|
||||
try {
|
||||
final Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms != null) {
|
||||
return terms.iterator(null);
|
||||
} else {
|
||||
return BytesRefIterator.EMPTY_ITERATOR;
|
||||
}
|
||||
}
|
||||
|
||||
public String next() {
|
||||
if (pendingTerm == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
|
||||
|
||||
try {
|
||||
pendingTerm = termsEnum.next();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return spare.toString();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return pendingTerm != null;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,6 +21,10 @@ package org.apache.lucene.search.spell;
|
|||
import java.util.Iterator;
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
* Dictionary represented by a text file.
|
||||
|
@ -33,8 +37,6 @@ import java.io.*;
|
|||
public class PlainTextDictionary implements Dictionary {
|
||||
|
||||
private BufferedReader in;
|
||||
private String line;
|
||||
private boolean hasNextCalled;
|
||||
|
||||
public PlainTextDictionary(File file) throws FileNotFoundException {
|
||||
in = new BufferedReader(new FileReader(file));
|
||||
|
@ -51,31 +53,37 @@ public class PlainTextDictionary implements Dictionary {
|
|||
in = new BufferedReader(reader);
|
||||
}
|
||||
|
||||
public Iterator<String> getWordsIterator() {
|
||||
return new fileIterator();
|
||||
public BytesRefIterator getWordsIterator() {
|
||||
return new FileIterator();
|
||||
}
|
||||
|
||||
final class fileIterator implements Iterator<String> {
|
||||
public String next() {
|
||||
if (!hasNextCalled) {
|
||||
hasNext();
|
||||
final class FileIterator implements BytesRefIterator {
|
||||
private boolean done = false;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (done) {
|
||||
return null;
|
||||
}
|
||||
hasNextCalled = false;
|
||||
return line;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
hasNextCalled = true;
|
||||
boolean success = false;
|
||||
BytesRef result;
|
||||
try {
|
||||
line = in.readLine();
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
String line;
|
||||
if ((line = in.readLine()) != null) {
|
||||
spare.copyChars(line);
|
||||
result = spare;
|
||||
} else {
|
||||
done = true;
|
||||
IOUtils.close(in);
|
||||
result = null;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
return (line != null) ? true : false;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,12 +17,17 @@ package org.apache.lucene.search.spell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Marker interface to signal that elements coming from {@link Iterator}
|
||||
* come in ascending lexicographic order.
|
||||
*/
|
||||
public interface SortedIterator {
|
||||
|
||||
public Comparator<BytesRef> comparator();
|
||||
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -510,20 +511,18 @@ public class SpellChecker implements java.io.Closeable {
|
|||
boolean isEmpty = termsEnums.isEmpty();
|
||||
|
||||
try {
|
||||
Iterator<String> iter = dict.getWordsIterator();
|
||||
BytesRef currentTerm = new BytesRef();
|
||||
BytesRefIterator iter = dict.getWordsIterator();
|
||||
BytesRef currentTerm;
|
||||
|
||||
terms: while (iter.hasNext()) {
|
||||
String word = iter.next();
|
||||
terms: while ((currentTerm = iter.next()) != null) {
|
||||
|
||||
String word = currentTerm.utf8ToString();
|
||||
int len = word.length();
|
||||
if (len < 3) {
|
||||
continue; // too short we bail but "too long" is fine...
|
||||
}
|
||||
|
||||
if (!isEmpty) {
|
||||
// we have a non-empty index, check if the term exists
|
||||
currentTerm.copyChars(word);
|
||||
for (TermsEnum te : termsEnums) {
|
||||
if (te.seekExact(currentTerm, false)) {
|
||||
continue terms;
|
||||
|
|
|
@ -17,16 +17,18 @@ package org.apache.lucene.search.spell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
public interface TermFreqIterator extends Iterator<String> {
|
||||
public interface TermFreqIterator extends BytesRefIterator {
|
||||
|
||||
public float freq();
|
||||
|
||||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||
private Iterator<String> wrapped;
|
||||
private BytesRefIterator wrapped;
|
||||
|
||||
public TermFreqIteratorWrapper(Iterator<String> wrapped) {
|
||||
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
|
@ -34,17 +36,8 @@ public interface TermFreqIterator extends Iterator<String> {
|
|||
return 1.0f;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrapped.hasNext();
|
||||
public BytesRef next() throws IOException {
|
||||
return wrapped.next();
|
||||
}
|
||||
|
||||
public String next() {
|
||||
return wrapped.next().toString();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,65 +17,46 @@ package org.apache.lucene.search.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This wrapper buffers incoming elements.
|
||||
*/
|
||||
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||
|
||||
/** Entry in the buffer. */
|
||||
public static final class Entry implements Comparable<Entry> {
|
||||
String word;
|
||||
float freq;
|
||||
|
||||
public Entry(String word, float freq) {
|
||||
this.word = word;
|
||||
this.freq = freq;
|
||||
protected BytesRefList entries = new BytesRefList();
|
||||
protected int curPos = -1;
|
||||
protected float[] freqs = new float[1];
|
||||
private final BytesRef spare = new BytesRef();
|
||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
BytesRef spare;
|
||||
int freqIndex = 0;
|
||||
while((spare = source.next()) != null) {
|
||||
entries.append(spare);
|
||||
if (freqIndex >= freqs.length) {
|
||||
freqs = ArrayUtil.grow(freqs, freqs.length+1);
|
||||
}
|
||||
freqs[freqIndex++] = source.freq();
|
||||
}
|
||||
|
||||
public int compareTo(Entry o) {
|
||||
return word.compareTo(o.word);
|
||||
}
|
||||
}
|
||||
|
||||
protected ArrayList<Entry> entries = new ArrayList<Entry>();
|
||||
|
||||
protected int curPos;
|
||||
protected Entry curEntry;
|
||||
|
||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) {
|
||||
// read all source data into buffer
|
||||
while (source.hasNext()) {
|
||||
String w = source.next();
|
||||
Entry e = new Entry(w, source.freq());
|
||||
entries.add(e);
|
||||
}
|
||||
curPos = 0;
|
||||
|
||||
}
|
||||
|
||||
public float freq() {
|
||||
return curEntry.freq;
|
||||
return freqs[curPos];
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return curPos < entries.size();
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++curPos < entries.size()) {
|
||||
entries.get(spare, curPos);
|
||||
return spare;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String next() {
|
||||
curEntry = entries.get(curPos);
|
||||
curPos++;
|
||||
return curEntry.word;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove is not supported");
|
||||
}
|
||||
|
||||
public List<Entry> entries() {
|
||||
return entries;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
|
||||
final class BytesRefList {
|
||||
|
||||
private final ByteBlockPool pool;
|
||||
private int[] offsets = new int[1];
|
||||
private int currentElement = 0;
|
||||
private int currentOffset = 0;
|
||||
|
||||
public BytesRefList() {
|
||||
this(new ByteBlockPool(new ByteBlockPool.DirectAllocator()));
|
||||
}
|
||||
|
||||
public BytesRefList(ByteBlockPool pool) {
|
||||
this.pool = pool;
|
||||
pool.nextBuffer();
|
||||
}
|
||||
|
||||
public int append(BytesRef bytes) {
|
||||
if (currentElement >= offsets.length) {
|
||||
offsets = ArrayUtil.grow(offsets, offsets.length + 1);
|
||||
}
|
||||
pool.copy(bytes);
|
||||
offsets[currentElement++] = currentOffset;
|
||||
currentOffset += bytes.length;
|
||||
return currentElement;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return currentElement;
|
||||
}
|
||||
|
||||
public BytesRef get(BytesRef bytes, int pos) {
|
||||
if (currentElement > pos) {
|
||||
bytes.offset = offsets[pos];
|
||||
bytes.length = pos == currentElement - 1 ? currentOffset - bytes.offset
|
||||
: offsets[pos + 1] - bytes.offset;
|
||||
pool.copyFrom(bytes);
|
||||
return bytes;
|
||||
}
|
||||
throw new IndexOutOfBoundsException("index " + pos
|
||||
+ " must be less than the size: " + currentElement);
|
||||
|
||||
}
|
||||
|
||||
public BytesRefIterator iterator() {
|
||||
final int numElements = currentElement;
|
||||
|
||||
return new BytesRefIterator() {
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private int pos = 0;
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (pos < numElements) {
|
||||
get(spare, pos++);
|
||||
return spare;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public int[] sort(final Comparator<BytesRef> comp) {
|
||||
final int[] orderdEntries = new int[size()];
|
||||
for (int i = 0; i < orderdEntries.length; i++) {
|
||||
orderdEntries[i] = i;
|
||||
}
|
||||
new SorterTemplate() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final int o = orderdEntries[i];
|
||||
orderdEntries[i] = orderdEntries[j];
|
||||
orderdEntries[j] = o;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
final int ord1 = orderdEntries[i], ord2 = orderdEntries[j];
|
||||
return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
final int ord = orderdEntries[i];
|
||||
get(pivot, ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
final int ord = orderdEntries[j];
|
||||
return comp.compare(pivot, get(scratch2, ord));
|
||||
}
|
||||
|
||||
private final BytesRef pivot = new BytesRef(),
|
||||
scratch1 = new BytesRef(), scratch2 = new BytesRef();
|
||||
}.quickSort(0, size() - 1);
|
||||
return orderdEntries;
|
||||
}
|
||||
}
|
|
@ -22,6 +22,8 @@ import java.io.*;
|
|||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -36,7 +38,7 @@ public class FileDictionary implements Dictionary {
|
|||
|
||||
private BufferedReader in;
|
||||
private String line;
|
||||
private boolean hasNextCalled;
|
||||
private boolean done = false;
|
||||
|
||||
public FileDictionary(InputStream dictFile) {
|
||||
in = new BufferedReader(new InputStreamReader(dictFile));
|
||||
|
@ -50,45 +52,39 @@ public class FileDictionary implements Dictionary {
|
|||
}
|
||||
|
||||
public TermFreqIterator getWordsIterator() {
|
||||
return new fileIterator();
|
||||
return new FileIterator();
|
||||
}
|
||||
|
||||
final class fileIterator implements TermFreqIterator {
|
||||
final class FileIterator implements TermFreqIterator {
|
||||
private float curFreq;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
|
||||
public String next() {
|
||||
if (!hasNextCalled) {
|
||||
hasNext();
|
||||
}
|
||||
hasNextCalled = false;
|
||||
return line;
|
||||
}
|
||||
|
||||
|
||||
public float freq() {
|
||||
return curFreq;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
hasNextCalled = true;
|
||||
try {
|
||||
line = in.readLine();
|
||||
if (line != null) {
|
||||
String[] fields = line.split("\t");
|
||||
if (fields.length > 1) {
|
||||
curFreq = Float.parseFloat(fields[1]);
|
||||
line = fields[0];
|
||||
} else {
|
||||
curFreq = 1;
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (done) {
|
||||
return null;
|
||||
}
|
||||
line = in.readLine();
|
||||
if (line != null) {
|
||||
String[] fields = line.split("\t");
|
||||
if (fields.length > 1) {
|
||||
curFreq = Float.parseFloat(fields[1]);
|
||||
spare.copyChars(fields[0]);
|
||||
} else {
|
||||
spare.copyChars(line);
|
||||
curFreq = 1;
|
||||
}
|
||||
return spare;
|
||||
} else {
|
||||
done = true;
|
||||
IOUtils.close(in);
|
||||
return null;
|
||||
}
|
||||
return (line != null) ? true : false;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,11 +19,13 @@ package org.apache.lucene.search.suggest;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
public abstract class Lookup {
|
||||
|
@ -77,7 +79,7 @@ public abstract class Lookup {
|
|||
* {@link UnsortedTermFreqIteratorWrapper} in such case.
|
||||
*/
|
||||
public void build(Dictionary dict) throws IOException {
|
||||
Iterator<String> it = dict.getWordsIterator();
|
||||
BytesRefIterator it = dict.getWordsIterator();
|
||||
TermFreqIterator tfit;
|
||||
if (it instanceof TermFreqIterator) {
|
||||
tfit = (TermFreqIterator)it;
|
||||
|
@ -89,6 +91,52 @@ public abstract class Lookup {
|
|||
|
||||
public abstract void build(TermFreqIterator tfit) throws IOException;
|
||||
|
||||
/**
|
||||
* Look up a key and return possible completion for this key.
|
||||
* @param key lookup key. Depending on the implementation this may be
|
||||
* a prefix, misspelling, or even infix.
|
||||
* @param onlyMorePopular return only more popular results
|
||||
* @param num maximum number of results to return
|
||||
* @return a list of possible completions, with their relative weight (e.g. popularity)
|
||||
*/
|
||||
// TODO: this should be a BytesRef API?
|
||||
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
|
||||
|
||||
/**
|
||||
* Modify the lookup data by recording additional data. Optional operation.
|
||||
* @param key new lookup key
|
||||
* @param value value to associate with this key
|
||||
* @return true if new key is added, false if it already exists or operation
|
||||
* is not supported.
|
||||
*/
|
||||
// TODO: this should be a BytesRef API?
|
||||
public abstract boolean add(String key, Object value);
|
||||
|
||||
/**
|
||||
* Get value associated with a specific key.
|
||||
* @param key lookup key
|
||||
* @return associated value
|
||||
*/
|
||||
// TODO: this should be a BytesRef API?
|
||||
public abstract Object get(String key);
|
||||
|
||||
/**
|
||||
* Persist the constructed lookup data to a directory. Optional operation.
|
||||
* @param output {@link OutputStream} to write the data to.
|
||||
* @return true if successful, false if unsuccessful or not supported.
|
||||
* @throws IOException when fatal IO error occurs.
|
||||
*/
|
||||
public abstract boolean store(OutputStream output) throws IOException;
|
||||
|
||||
/**
|
||||
* Discard current lookup data and load it from a previously saved copy.
|
||||
* Optional operation.
|
||||
* @param input the {@link InputStream} to load the lookup data.
|
||||
* @return true if completed successfully, false if unsuccessful or not supported.
|
||||
* @throws IOException when fatal IO error occurs.
|
||||
*/
|
||||
public abstract boolean load(InputStream input) throws IOException;
|
||||
|
||||
/**
|
||||
* Persist the constructed lookup data to a directory. Optional operation.
|
||||
* @param storeDir directory where data can be stored.
|
||||
|
@ -105,30 +153,4 @@ public abstract class Lookup {
|
|||
* @throws IOException when fatal IO error occurs.
|
||||
*/
|
||||
public abstract boolean load(File storeDir) throws IOException;
|
||||
|
||||
/**
|
||||
* Look up a key and return possible completion for this key.
|
||||
* @param key lookup key. Depending on the implementation this may be
|
||||
* a prefix, misspelling, or even infix.
|
||||
* @param onlyMorePopular return only more popular results
|
||||
* @param num maximum number of results to return
|
||||
* @return a list of possible completions, with their relative weight (e.g. popularity)
|
||||
*/
|
||||
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
|
||||
|
||||
/**
|
||||
* Modify the lookup data by recording additional data. Optional operation.
|
||||
* @param key new lookup key
|
||||
* @param value value to associate with this key
|
||||
* @return true if new key is added, false if it already exists or operation
|
||||
* is not supported.
|
||||
*/
|
||||
public abstract boolean add(String key, Object value);
|
||||
|
||||
/**
|
||||
* Get value associated with a specific key.
|
||||
* @param key lookup key
|
||||
* @return associated value
|
||||
*/
|
||||
public abstract Object get(String key);
|
||||
}
|
||||
|
|
|
@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.spell.SortedIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This wrapper buffers incoming elements and makes sure they are sorted in
|
||||
|
@ -28,8 +30,35 @@ import org.apache.lucene.search.spell.TermFreqIterator;
|
|||
*/
|
||||
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
|
||||
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source) {
|
||||
private final int[] sortedOrds;
|
||||
private int currentOrd = -1;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final Comparator<BytesRef> comp;
|
||||
|
||||
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
|
||||
super(source);
|
||||
Collections.sort(entries);
|
||||
this.sortedOrds = entries.sort(comp);
|
||||
this.comp = comp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
return freqs[currentOrd];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++curPos < entries.size()) {
|
||||
return entries.get(spare, (currentOrd = sortedOrds[curPos]));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> comparator() {
|
||||
return comp;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -17,9 +17,11 @@ package org.apache.lucene.search.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This wrapper buffers the incoming elements and makes sure they are in
|
||||
|
@ -27,8 +29,34 @@ import org.apache.lucene.search.spell.TermFreqIterator;
|
|||
*/
|
||||
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
|
||||
|
||||
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) {
|
||||
private final int[] ords;
|
||||
private int currentOrd = -1;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
super(source);
|
||||
Collections.shuffle(entries);
|
||||
ords = new int[entries.size()];
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < ords.length; i++) {
|
||||
ords[i] = i;
|
||||
}
|
||||
for (int i = 0; i < ords.length; i++) {
|
||||
int randomPosition = random.nextInt(ords.length);
|
||||
int temp = ords[i];
|
||||
ords[i] = ords[randomPosition];
|
||||
ords[randomPosition] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
return freqs[currentOrd];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++curPos < entries.size()) {
|
||||
return entries.get(spare, (currentOrd = ords[curPos]));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -29,6 +31,8 @@ import org.apache.lucene.search.suggest.fst.Sort.SortInfo;
|
|||
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.NoOutputs;
|
||||
|
@ -158,20 +162,17 @@ public class FSTCompletionLookup extends Lookup {
|
|||
// If negative floats are allowed some trickery needs to be done to find their byte order.
|
||||
boolean success = false;
|
||||
try {
|
||||
BytesRef tmp1 = new BytesRef();
|
||||
byte [] buffer = new byte [0];
|
||||
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
|
||||
while (tfit.hasNext()) {
|
||||
String key = tfit.next();
|
||||
UnicodeUtil.UTF16toUTF8(key, 0, key.length(), tmp1);
|
||||
|
||||
if (tmp1.length + 4 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, tmp1.length + 4);
|
||||
BytesRef spare;
|
||||
while ((spare = tfit.next()) != null) {
|
||||
if (spare.length + 4 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 4);
|
||||
}
|
||||
|
||||
output.reset(buffer);
|
||||
output.writeInt(FloatMagic.toSortable(tfit.freq()));
|
||||
output.writeBytes(tmp1.bytes, tmp1.offset, tmp1.length);
|
||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||
writer.write(buffer, 0, output.getPosition());
|
||||
}
|
||||
writer.close();
|
||||
|
@ -189,6 +190,7 @@ public class FSTCompletionLookup extends Lookup {
|
|||
int previousBucket = 0;
|
||||
float previousScore = 0;
|
||||
ByteArrayDataInput input = new ByteArrayDataInput();
|
||||
BytesRef tmp1 = new BytesRef();
|
||||
BytesRef tmp2 = new BytesRef();
|
||||
while (reader.read(tmp1)) {
|
||||
input.reset(tmp1.bytes);
|
||||
|
@ -293,4 +295,30 @@ public class FSTCompletionLookup extends Lookup {
|
|||
normalCompletion.getFST().save(new File(storeDir, FILENAME));
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean store(OutputStream output) throws IOException {
|
||||
|
||||
if (this.normalCompletion == null)
|
||||
return false;
|
||||
try {
|
||||
normalCompletion.getFST().save(new OutputStreamDataOutput(output));
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean load(InputStream input) throws IOException {
|
||||
try {
|
||||
this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
|
||||
new InputStreamDataInput(input), NoOutputs.getSingleton()));
|
||||
this.normalCompletion = new FSTCompletion(
|
||||
higherWeightsCompletion.getFST(), false, exactMatchFirst);
|
||||
} finally {
|
||||
IOUtils.close(input);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -27,11 +29,12 @@ import org.apache.lucene.search.spell.TermFreqIterator;
|
|||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
|
@ -109,16 +112,14 @@ public class WFSTCompletionLookup extends Lookup {
|
|||
try {
|
||||
byte [] buffer = new byte [0];
|
||||
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
|
||||
while (iterator.hasNext()) {
|
||||
String key = iterator.next();
|
||||
UnicodeUtil.UTF16toUTF8(key, 0, key.length(), scratch);
|
||||
|
||||
if (scratch.length + 5 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, scratch.length + 5);
|
||||
BytesRef spare;
|
||||
while ((spare = iterator.next()) != null) {
|
||||
if (spare.length + 5 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 5);
|
||||
}
|
||||
|
||||
output.reset(buffer);
|
||||
output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
|
||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||
output.writeByte((byte)0); // separator: not used, just for sort order
|
||||
output.writeInt((int)encodeWeight(iterator.freq()));
|
||||
writer.write(buffer, 0, output.getPosition());
|
||||
|
@ -177,6 +178,26 @@ public class WFSTCompletionLookup extends Lookup {
|
|||
this.fst = FST.read(new File(storeDir, FILENAME), PositiveIntOutputs.getSingleton(true));
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean store(OutputStream output) throws IOException {
|
||||
try {
|
||||
fst.save(new OutputStreamDataOutput(output));
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean load(InputStream input) throws IOException {
|
||||
try {
|
||||
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true));
|
||||
} finally {
|
||||
IOUtils.close(input);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
|
||||
|
|
|
@ -23,6 +23,8 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -31,6 +33,10 @@ import org.apache.lucene.search.spell.TermFreqIterator;
|
|||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
|
||||
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
public class JaspellLookup extends Lookup {
|
||||
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
|
||||
|
@ -41,17 +47,22 @@ public class JaspellLookup extends Lookup {
|
|||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
if (tfit instanceof SortedIterator) {
|
||||
// make sure it's unsorted
|
||||
// WTF - this could result in yet another sorted iteration....
|
||||
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
|
||||
}
|
||||
trie = new JaspellTernarySearchTrie();
|
||||
trie.setMatchAlmostDiff(editDistance);
|
||||
while (tfit.hasNext()) {
|
||||
String key = tfit.next();
|
||||
BytesRef spare;
|
||||
final CharsRef charsSpare = new CharsRef();
|
||||
|
||||
while ((spare = tfit.next()) != null) {
|
||||
float freq = tfit.freq();
|
||||
if (key.length() == 0) {
|
||||
if (spare.length == 0) {
|
||||
continue;
|
||||
}
|
||||
trie.put(key, new Float(freq));
|
||||
charsSpare.grow(spare.length);
|
||||
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
|
||||
trie.put(charsSpare.toString(), new Float(freq));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,15 +125,7 @@ public class JaspellLookup extends Lookup {
|
|||
if (!data.exists() || !data.canRead()) {
|
||||
return false;
|
||||
}
|
||||
DataInputStream in = new DataInputStream(new FileInputStream(data));
|
||||
TSTNode root = trie.new TSTNode('\0', null);
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
trie.setRoot(root);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
return true;
|
||||
return load(new FileInputStream(data));
|
||||
}
|
||||
|
||||
private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
|
||||
|
@ -153,19 +156,8 @@ public class JaspellLookup extends Lookup {
|
|||
if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
|
||||
return false;
|
||||
}
|
||||
TSTNode root = trie.getRoot();
|
||||
if (root == null) { // empty tree
|
||||
return false;
|
||||
}
|
||||
File data = new File(storeDir, FILENAME);
|
||||
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
return true;
|
||||
return store(new FileOutputStream(data));
|
||||
}
|
||||
|
||||
private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
|
||||
|
@ -186,4 +178,33 @@ public class JaspellLookup extends Lookup {
|
|||
writeRecursively(out, node.relatives[TSTNode.EQKID]);
|
||||
writeRecursively(out, node.relatives[TSTNode.HIKID]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean store(OutputStream output) throws IOException {
|
||||
TSTNode root = trie.getRoot();
|
||||
if (root == null) { // empty tree
|
||||
return false;
|
||||
}
|
||||
DataOutputStream out = new DataOutputStream(output);
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
IOUtils.close(out);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean load(InputStream input) throws IOException {
|
||||
DataInputStream in = new DataInputStream(input);
|
||||
TSTNode root = trie.new TSTNode('\0', null);
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
trie.setRoot(root);
|
||||
} finally {
|
||||
IOUtils.close(in);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -30,6 +32,10 @@ import org.apache.lucene.search.suggest.Lookup;
|
|||
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
||||
import org.apache.lucene.search.spell.SortedIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
public class TSTLookup extends Lookup {
|
||||
TernaryTreeNode root = new TernaryTreeNode();
|
||||
|
@ -39,15 +45,19 @@ public class TSTLookup extends Lookup {
|
|||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
root = new TernaryTreeNode();
|
||||
// buffer first
|
||||
if (!(tfit instanceof SortedIterator)) {
|
||||
// make sure it's sorted
|
||||
tfit = new SortedTermFreqIteratorWrapper(tfit);
|
||||
if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
|
||||
// make sure it's sorted and the comparator uses UTF16 sort order
|
||||
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
}
|
||||
|
||||
ArrayList<String> tokens = new ArrayList<String>();
|
||||
ArrayList<Float> vals = new ArrayList<Float>();
|
||||
while (tfit.hasNext()) {
|
||||
tokens.add(tfit.next());
|
||||
BytesRef spare;
|
||||
CharsRef charsSpare = new CharsRef();
|
||||
while ((spare = tfit.next()) != null) {
|
||||
charsSpare.grow(spare.length);
|
||||
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
|
||||
tokens.add(charsSpare.toString());
|
||||
vals.add(new Float(tfit.freq()));
|
||||
}
|
||||
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
||||
|
@ -113,14 +123,7 @@ public class TSTLookup extends Lookup {
|
|||
if (!data.exists() || !data.canRead()) {
|
||||
return false;
|
||||
}
|
||||
DataInputStream in = new DataInputStream(new FileInputStream(data));
|
||||
root = new TernaryTreeNode();
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
return true;
|
||||
return load(new FileInputStream(data));
|
||||
}
|
||||
|
||||
// pre-order traversal
|
||||
|
@ -153,14 +156,7 @@ public class TSTLookup extends Lookup {
|
|||
return false;
|
||||
}
|
||||
File data = new File(storeDir, FILENAME);
|
||||
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
return true;
|
||||
return store(new FileOutputStream(data));
|
||||
}
|
||||
|
||||
// pre-order traversal
|
||||
|
@ -188,4 +184,28 @@ public class TSTLookup extends Lookup {
|
|||
writeRecursively(out, node.hiKid);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean store(OutputStream output) throws IOException {
|
||||
DataOutputStream out = new DataOutputStream(output);
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean load(InputStream input) throws IOException {
|
||||
DataInputStream in = new DataInputStream(input);
|
||||
root = new TernaryTreeNode();
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
} finally {
|
||||
IOUtils.close(in);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,15 +18,17 @@ package org.apache.lucene.search.spell;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
|
@ -40,7 +42,8 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
private IndexReader indexReader = null;
|
||||
private LuceneDictionary ld;
|
||||
private Iterator<String> it;
|
||||
private BytesRefIterator it;
|
||||
private BytesRef spare = new BytesRef();
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
|
@ -84,13 +87,12 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
public void testFieldNonExistent() throws IOException {
|
||||
try {
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "nonexistent_field");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
assertFalse("More elements than expected", it.hasNext());
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertNull("More elements than expected", spare = it.next());
|
||||
} finally {
|
||||
if (indexReader != null) { indexReader.close(); }
|
||||
}
|
||||
|
@ -98,15 +100,13 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
public void testFieldAaa() throws IOException {
|
||||
try {
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "aaa");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
assertTrue("First element doesn't exist.", it.hasNext());
|
||||
assertTrue("First element isn't correct", it.next().equals("foo"));
|
||||
assertFalse("More elements than expected", it.hasNext());
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertNotNull("First element doesn't exist.", spare = it.next());
|
||||
assertTrue("First element isn't correct", spare.utf8ToString().equals("foo"));
|
||||
assertNull("More elements than expected", it.next());
|
||||
} finally {
|
||||
if (indexReader != null) { indexReader.close(); }
|
||||
}
|
||||
|
@ -114,24 +114,22 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
public void testFieldContents_1() throws IOException {
|
||||
try {
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "contents");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
assertTrue("First element doesn't exist.", it.hasNext());
|
||||
assertTrue("First element isn't correct", it.next().equals("Jerry"));
|
||||
assertTrue("Second element doesn't exist.", it.hasNext());
|
||||
assertTrue("Second element isn't correct", it.next().equals("Tom"));
|
||||
assertFalse("More elements than expected", it.hasNext());
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertNotNull("First element doesn't exist.", spare = it.next());
|
||||
assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry"));
|
||||
assertNotNull("Second element doesn't exist.", spare = it.next());
|
||||
assertTrue("Second element isn't correct", spare.utf8ToString().equals("Tom"));
|
||||
assertNull("More elements than expected", it.next());
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "contents");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
int counter = 2;
|
||||
while (it.hasNext()) {
|
||||
it.next();
|
||||
while (it.next() != null) {
|
||||
counter--;
|
||||
}
|
||||
|
||||
|
@ -144,30 +142,15 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
public void testFieldContents_2() throws IOException {
|
||||
try {
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "contents");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
// hasNext() should have no side effects
|
||||
assertTrue("First element isn't were it should be.", it.hasNext());
|
||||
assertTrue("First element isn't were it should be.", it.hasNext());
|
||||
assertTrue("First element isn't were it should be.", it.hasNext());
|
||||
|
||||
// just iterate through words
|
||||
assertTrue("First element isn't correct", it.next().equals("Jerry"));
|
||||
assertTrue("Second element isn't correct", it.next().equals("Tom"));
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
|
||||
// hasNext() should still have no side effects ...
|
||||
assertFalse("There should be any more elements", it.hasNext());
|
||||
assertFalse("There should be any more elements", it.hasNext());
|
||||
assertFalse("There should be any more elements", it.hasNext());
|
||||
|
||||
// .. and there are really no more words
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString());
|
||||
assertEquals("Second element isn't correct", "Tom", it.next().utf8ToString());
|
||||
assertNull("Nonexistent element is really null", it.next());
|
||||
}
|
||||
finally {
|
||||
if (indexReader != null) { indexReader.close(); }
|
||||
|
@ -176,15 +159,14 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
|
||||
public void testFieldZzz() throws IOException {
|
||||
try {
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
|
||||
ld = new LuceneDictionary(indexReader, "zzz");
|
||||
it = ld.getWordsIterator();
|
||||
|
||||
assertTrue("First element doesn't exist.", it.hasNext());
|
||||
assertTrue("First element isn't correct", it.next().equals("bar"));
|
||||
assertFalse("More elements than expected", it.hasNext());
|
||||
assertTrue("Nonexistent element is really null", it.next() == null);
|
||||
assertNotNull("First element doesn't exist.", spare = it.next());
|
||||
assertEquals("First element isn't correct", "bar", spare.utf8ToString());
|
||||
assertNull("More elements than expected", it.next());
|
||||
}
|
||||
finally {
|
||||
if (indexReader != null) { indexReader.close(); }
|
||||
|
@ -194,7 +176,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
|
|||
public void testSpellchecker() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
SpellChecker sc = new SpellChecker(dir);
|
||||
indexReader = IndexReader.open(store);
|
||||
indexReader = DirectoryReader.open(store);
|
||||
sc.indexDictionary(new LuceneDictionary(indexReader, "contents"), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
|
||||
String[] suggestions = sc.suggestSimilar("Tam", 1);
|
||||
assertEquals(1, suggestions.length);
|
||||
|
|
|
@ -191,7 +191,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
|
||||
final List<String> input = new ArrayList<String>(benchmarkInput.size());
|
||||
for (TermFreq tf : benchmarkInput) {
|
||||
input.add(tf.term.substring(0, Math.min(tf.term.length(),
|
||||
input.add(tf.term.utf8ToString().substring(0, Math.min(tf.term.length,
|
||||
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
|
||||
}
|
||||
|
||||
|
|
|
@ -75,11 +75,11 @@ public class PersistenceTest extends LuceneTestCase {
|
|||
// Assert validity.
|
||||
float previous = Float.NEGATIVE_INFINITY;
|
||||
for (TermFreq k : keys) {
|
||||
Float val = (Float) lookup.get(k.term);
|
||||
assertNotNull(k.term, val);
|
||||
Float val = (Float) lookup.get(k.term.utf8ToString());
|
||||
assertNotNull(k.term.utf8ToString(), val);
|
||||
|
||||
if (supportsExactWeights) {
|
||||
assertEquals(k.term, Float.valueOf(k.v), val);
|
||||
assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
|
||||
} else {
|
||||
assertTrue(val + ">=" + previous, val >= previous);
|
||||
previous = val.floatValue();
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -18,10 +20,14 @@ package org.apache.lucene.search.suggest;
|
|||
*/
|
||||
|
||||
public final class TermFreq {
|
||||
public final String term;
|
||||
public final BytesRef term;
|
||||
public final float v;
|
||||
|
||||
public TermFreq(String term, float v) {
|
||||
this(new BytesRef(term), v);
|
||||
}
|
||||
|
||||
public TermFreq(BytesRef term, float v) {
|
||||
this.term = term;
|
||||
this.v = v;
|
||||
}
|
||||
|
|
|
@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
|
||||
|
@ -28,6 +30,7 @@ import org.apache.lucene.search.spell.TermFreqIterator;
|
|||
public final class TermFreqArrayIterator implements TermFreqIterator {
|
||||
private final Iterator<TermFreq> i;
|
||||
private TermFreq current;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
|
||||
public TermFreqArrayIterator(Iterator<TermFreq> i) {
|
||||
this.i = i;
|
||||
|
@ -44,14 +47,14 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
|
|||
public float freq() {
|
||||
return current.v;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return i.hasNext();
|
||||
}
|
||||
|
||||
public String next() {
|
||||
return (current = i.next()).term;
|
||||
}
|
||||
|
||||
public void remove() { throw new UnsupportedOperationException(); }
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (i.hasNext()) {
|
||||
current = i.next();
|
||||
spare.copyBytes(current.term);
|
||||
return spare;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestBytesRefList extends LuceneTestCase {
|
||||
|
||||
public void testAppend() throws IOException {
|
||||
BytesRefList list = new BytesRefList();
|
||||
List<String> stringList = new ArrayList<String>();
|
||||
int entries = atLeast(500);
|
||||
BytesRef spare = new BytesRef();
|
||||
for (int i = 0; i < entries; i++) {
|
||||
String randomRealisticUnicodeString = _TestUtil
|
||||
.randomRealisticUnicodeString(random);
|
||||
spare.copyChars(randomRealisticUnicodeString);
|
||||
list.append(spare);
|
||||
stringList.add(randomRealisticUnicodeString);
|
||||
}
|
||||
for (int i = 0; i < entries; i++) {
|
||||
assertNotNull(list.get(spare, i));
|
||||
assertEquals("entry " + i + " doesn't match", stringList.get(i),
|
||||
spare.utf8ToString());
|
||||
}
|
||||
|
||||
// check random
|
||||
for (int i = 0; i < entries; i++) {
|
||||
int e = random.nextInt(entries);
|
||||
assertNotNull(list.get(spare, e));
|
||||
assertEquals("entry " + i + " doesn't match", stringList.get(e),
|
||||
spare.utf8ToString());
|
||||
}
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
||||
BytesRefIterator iterator = list.iterator();
|
||||
for (String string : stringList) {
|
||||
assertEquals(string, iterator.next().utf8ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSort() {
|
||||
BytesRefList list = new BytesRefList();
|
||||
List<String> stringList = new ArrayList<String>();
|
||||
int entries = atLeast(500);
|
||||
BytesRef spare = new BytesRef();
|
||||
for (int i = 0; i < entries; i++) {
|
||||
String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
|
||||
spare.copyChars(randomRealisticUnicodeString);
|
||||
list.append(spare);
|
||||
stringList.add(randomRealisticUnicodeString);
|
||||
}
|
||||
Collections.sort(stringList);
|
||||
int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
for (int i = 0; i < entries; i++) {
|
||||
assertNotNull(list.get(spare, sortedOrds[i]));
|
||||
assertEquals("entry " + i + " doesn't match", stringList.get(i),
|
||||
spare.utf8ToString());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -40,7 +40,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
|
||||
FSTCompletionBuilder builder = new FSTCompletionBuilder();
|
||||
for (TermFreq tf : evalKeys()) {
|
||||
builder.add(new BytesRef(tf.term), (int) tf.v);
|
||||
builder.add(tf.term, (int) tf.v);
|
||||
}
|
||||
completion = builder.build();
|
||||
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
|
||||
|
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
// are.
|
||||
Float previous = null;
|
||||
for (TermFreq tf : keys) {
|
||||
Float current = lookup.get(tf.term);
|
||||
Float current = lookup.get(tf.term.utf8ToString());
|
||||
if (previous != null) {
|
||||
assertEquals(previous, current);
|
||||
}
|
||||
|
@ -183,8 +183,8 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
lookup.build(new TermFreqArrayIterator(input));
|
||||
|
||||
for (TermFreq tf : input) {
|
||||
assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null);
|
||||
assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key);
|
||||
assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
|
||||
assertEquals(tf.term, lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
|
||||
}
|
||||
|
||||
List<LookupResult> result = lookup.lookup("wit", true, 5);
|
||||
|
@ -211,7 +211,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
|
||||
|
||||
for (TermFreq tf : freqs) {
|
||||
final String term = tf.term;
|
||||
final String term = tf.term.utf8ToString();
|
||||
for (int i = 1; i < term.length(); i++) {
|
||||
String prefix = term.substring(0, i);
|
||||
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
|
||||
|
|
Loading…
Reference in New Issue