LUCENE-3807: clean up TermFreqIterator API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1291418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-02-20 19:35:59 +00:00
parent 630addb415
commit 1860439f15
27 changed files with 753 additions and 390 deletions

View File

@ -23,6 +23,7 @@ import java.util.Comparator;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
* #seekExact(BytesRef,boolean)}) or step through ({@link
@ -40,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
* of the <code>seek</code> methods.
*
* @lucene.experimental */
public abstract class TermsEnum {
public abstract class TermsEnum implements BytesRefIterator {
private AttributeSource atts = null;
@ -114,14 +115,6 @@ public abstract class TermsEnum {
}
}
/** Increments the enumeration to the next term.
* Returns the resulting term, or null if the end was
* hit (which means the enum is unpositioned). The
* returned BytesRef may be re-used across calls to next.
* After this method returns null, do not call it again:
* the results are undefined. */
public abstract BytesRef next() throws IOException;
/** Returns current term. Do not call this when the enum
* is unpositioned. */
public abstract BytesRef term() throws IOException;

View File

@ -280,6 +280,37 @@ public final class ByteBlockPool {
} while(true);
}
/**
*
*/
public final BytesRef copyFrom(final BytesRef bytes) {
final int length = bytes.length;
final int offset = bytes.offset;
bytes.offset = 0;
bytes.grow(length);
int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
byte[] buffer = buffers[bufferIndex];
int pos = offset & BYTE_BLOCK_MASK;
int overflow = (pos + length) - BYTE_BLOCK_SIZE;
do {
if (overflow <= 0) {
System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
bytes.length = length;
bytes.offset = 0;
break;
} else {
final int bytesToCopy = length - overflow;
System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
pos = 0;
bytes.length -= bytesToCopy;
bytes.offset += bytesToCopy;
buffer = buffers[bufferIndex];
overflow = overflow - BYTE_BLOCK_SIZE;
}
} while (true);
return bytes;
}
/**
* Writes the pools content to the given {@link DataOutput}
*/

View File

@ -0,0 +1,52 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* A simple iterator interface for {@link BytesRef} iteration
*
*/
public interface BytesRefIterator {
public static final BytesRefIterator EMPTY_ITERATOR = new EmptyBytesRefIterator();
/**
* Increments the iteration to the next {@link BytesRef} in the iterator.
* Returns the resulting {@link BytesRef} or <code>null</code> if the end of
* the iterator is reached. The returned BytesRef may be re-used across calls
* to next. After this method returns null, do not call it again: the results
* are undefined.
*
* @return the next {@link BytesRef} in the iterator or <code>null</code> if
* the end of the iterator is reached.
* @throws IOException
*/
public BytesRef next() throws IOException;
public final static class EmptyBytesRefIterator implements BytesRefIterator {
@Override
public BytesRef next() throws IOException {
return null;
}
}
}

View File

@ -16,7 +16,7 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
import java.util.Iterator;
import org.apache.lucene.util.BytesRefIterator;
/**
* A simple interface representing a Dictionary. A Dictionary
@ -30,5 +30,5 @@ public interface Dictionary {
* Return all words present in the dictionary
* @return Iterator
*/
Iterator<String> getWordsIterator();
BytesRefIterator getWordsIterator();
}

View File

@ -18,12 +18,14 @@
package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
@ -50,14 +52,13 @@ public class HighFrequencyDictionary implements Dictionary {
this.thresh = thresh;
}
public final Iterator<String> getWordsIterator() {
public final BytesRefIterator getWordsIterator() {
return new HighFrequencyIterator();
}
final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
private TermsEnum termsEnum;
private BytesRef actualTerm;
private boolean hasNextCalled;
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
HighFrequencyIterator() {
@ -65,6 +66,8 @@ public class HighFrequencyDictionary implements Dictionary {
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
minNumDocs = (int)(thresh * (float)reader.numDocs());
} catch (IOException e) {
@ -83,57 +86,27 @@ public class HighFrequencyDictionary implements Dictionary {
throw new RuntimeException(ioe);
}
}
public String next() {
if (!hasNextCalled && !hasNext()) {
return null;
}
hasNextCalled = false;
if (actualTerm == null) {
return null;
} else {
UnicodeUtil.UTF8toUTF16(actualTerm, spare);
return spare.toString();
@Override
public BytesRef next() throws IOException {
if (termsEnum != null) {
BytesRef next = termsEnum.next();
if (next != null && isFrequent(termsEnum.docFreq())) {
spare.copyBytes(next);
return spare;
}
}
return null;
}
public boolean hasNext() {
if (hasNextCalled) {
return actualTerm != null;
@Override
public Comparator<BytesRef> comparator() {
try {
return termsEnum.getComparator();
} catch (IOException e) {
throw new RuntimeException(e);
}
hasNextCalled = true;
if (termsEnum == null) {
return false;
}
while(true) {
try {
actualTerm = termsEnum.next();
} catch (IOException e) {
throw new RuntimeException(e);
}
// if there are no words return false
if (actualTerm == null) {
return false;
}
// got a valid term, does it pass the threshold?
try {
if (isFrequent(termsEnum.docFreq())) {
return true;
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
public void remove() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -18,13 +18,7 @@ package org.apache.lucene.search.spell;
*/
import org.apache.lucene.index.IndexReader;
import java.util.Iterator;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
@ -49,50 +43,18 @@ public class LuceneDictionary implements Dictionary {
this.field = field;
}
public final Iterator<String> getWordsIterator() {
return new LuceneIterator();
}
final class LuceneIterator implements Iterator<String> {
private TermsEnum termsEnum;
private BytesRef pendingTerm;
private final CharsRef spare = new CharsRef();
LuceneIterator() {
try {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
termsEnum = terms.iterator(null);
pendingTerm = termsEnum.next();
}
} catch (IOException e) {
throw new RuntimeException(e);
public final BytesRefIterator getWordsIterator() {
try {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
return terms.iterator(null);
} else {
return BytesRefIterator.EMPTY_ITERATOR;
}
}
public String next() {
if (pendingTerm == null) {
return null;
}
UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
try {
pendingTerm = termsEnum.next();
} catch (IOException e) {
throw new RuntimeException(e);
}
return spare.toString();
}
public boolean hasNext() {
return pendingTerm != null;
}
public void remove() {
throw new UnsupportedOperationException();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -21,6 +21,10 @@ package org.apache.lucene.search.spell;
import java.util.Iterator;
import java.io.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
/**
* Dictionary represented by a text file.
@ -33,8 +37,6 @@ import java.io.*;
public class PlainTextDictionary implements Dictionary {
private BufferedReader in;
private String line;
private boolean hasNextCalled;
public PlainTextDictionary(File file) throws FileNotFoundException {
in = new BufferedReader(new FileReader(file));
@ -51,31 +53,37 @@ public class PlainTextDictionary implements Dictionary {
in = new BufferedReader(reader);
}
public Iterator<String> getWordsIterator() {
return new fileIterator();
public BytesRefIterator getWordsIterator() {
return new FileIterator();
}
final class fileIterator implements Iterator<String> {
public String next() {
if (!hasNextCalled) {
hasNext();
final class FileIterator implements BytesRefIterator {
private boolean done = false;
private final BytesRef spare = new BytesRef();
@Override
public BytesRef next() throws IOException {
if (done) {
return null;
}
hasNextCalled = false;
return line;
}
public boolean hasNext() {
hasNextCalled = true;
boolean success = false;
BytesRef result;
try {
line = in.readLine();
} catch (IOException ex) {
throw new RuntimeException(ex);
String line;
if ((line = in.readLine()) != null) {
spare.copyChars(line);
result = spare;
} else {
done = true;
IOUtils.close(in);
result = null;
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
}
}
return (line != null) ? true : false;
}
public void remove() {
throw new UnsupportedOperationException();
return result;
}
}

View File

@ -17,12 +17,17 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
/**
* Marker interface to signal that elements coming from {@link Iterator}
* come in ascending lexicographic order.
*/
public interface SortedIterator {
public Comparator<BytesRef> comparator();
}

View File

@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
@ -510,20 +511,18 @@ public class SpellChecker implements java.io.Closeable {
boolean isEmpty = termsEnums.isEmpty();
try {
Iterator<String> iter = dict.getWordsIterator();
BytesRef currentTerm = new BytesRef();
BytesRefIterator iter = dict.getWordsIterator();
BytesRef currentTerm;
terms: while (iter.hasNext()) {
String word = iter.next();
terms: while ((currentTerm = iter.next()) != null) {
String word = currentTerm.utf8ToString();
int len = word.length();
if (len < 3) {
continue; // too short we bail but "too long" is fine...
}
if (!isEmpty) {
// we have a non-empty index, check if the term exists
currentTerm.copyChars(word);
for (TermsEnum te : termsEnums) {
if (te.seekExact(currentTerm, false)) {
continue terms;

View File

@ -17,16 +17,18 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
import java.util.Iterator;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
public interface TermFreqIterator extends Iterator<String> {
public interface TermFreqIterator extends BytesRefIterator {
public float freq();
public static class TermFreqIteratorWrapper implements TermFreqIterator {
private Iterator<String> wrapped;
private BytesRefIterator wrapped;
public TermFreqIteratorWrapper(Iterator<String> wrapped) {
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped;
}
@ -34,17 +36,8 @@ public interface TermFreqIterator extends Iterator<String> {
return 1.0f;
}
public boolean hasNext() {
return wrapped.hasNext();
public BytesRef next() throws IOException {
return wrapped.next();
}
public String next() {
return wrapped.next().toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -17,65 +17,46 @@ package org.apache.lucene.search.suggest;
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers incoming elements.
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
/** Entry in the buffer. */
public static final class Entry implements Comparable<Entry> {
String word;
float freq;
public Entry(String word, float freq) {
this.word = word;
this.freq = freq;
protected BytesRefList entries = new BytesRefList();
protected int curPos = -1;
protected float[] freqs = new float[1];
private final BytesRef spare = new BytesRef();
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
BytesRef spare;
int freqIndex = 0;
while((spare = source.next()) != null) {
entries.append(spare);
if (freqIndex >= freqs.length) {
freqs = ArrayUtil.grow(freqs, freqs.length+1);
}
freqs[freqIndex++] = source.freq();
}
public int compareTo(Entry o) {
return word.compareTo(o.word);
}
}
protected ArrayList<Entry> entries = new ArrayList<Entry>();
protected int curPos;
protected Entry curEntry;
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) {
// read all source data into buffer
while (source.hasNext()) {
String w = source.next();
Entry e = new Entry(w, source.freq());
entries.add(e);
}
curPos = 0;
}
public float freq() {
return curEntry.freq;
return freqs[curPos];
}
public boolean hasNext() {
return curPos < entries.size();
@Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
entries.get(spare, curPos);
return spare;
}
return null;
}
public String next() {
curEntry = entries.get(curPos);
curPos++;
return curEntry.word;
}
public void remove() {
throw new UnsupportedOperationException("remove is not supported");
}
public List<Entry> entries() {
return entries;
}
}

View File

@ -0,0 +1,125 @@
package org.apache.lucene.search.suggest;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.SorterTemplate;
final class BytesRefList {
private final ByteBlockPool pool;
private int[] offsets = new int[1];
private int currentElement = 0;
private int currentOffset = 0;
public BytesRefList() {
this(new ByteBlockPool(new ByteBlockPool.DirectAllocator()));
}
public BytesRefList(ByteBlockPool pool) {
this.pool = pool;
pool.nextBuffer();
}
public int append(BytesRef bytes) {
if (currentElement >= offsets.length) {
offsets = ArrayUtil.grow(offsets, offsets.length + 1);
}
pool.copy(bytes);
offsets[currentElement++] = currentOffset;
currentOffset += bytes.length;
return currentElement;
}
public int size() {
return currentElement;
}
public BytesRef get(BytesRef bytes, int pos) {
if (currentElement > pos) {
bytes.offset = offsets[pos];
bytes.length = pos == currentElement - 1 ? currentOffset - bytes.offset
: offsets[pos + 1] - bytes.offset;
pool.copyFrom(bytes);
return bytes;
}
throw new IndexOutOfBoundsException("index " + pos
+ " must be less than the size: " + currentElement);
}
public BytesRefIterator iterator() {
final int numElements = currentElement;
return new BytesRefIterator() {
private final BytesRef spare = new BytesRef();
private int pos = 0;
@Override
public BytesRef next() throws IOException {
if (pos < numElements) {
get(spare, pos++);
return spare;
}
return null;
}
};
}
public int[] sort(final Comparator<BytesRef> comp) {
final int[] orderdEntries = new int[size()];
for (int i = 0; i < orderdEntries.length; i++) {
orderdEntries[i] = i;
}
new SorterTemplate() {
@Override
protected void swap(int i, int j) {
final int o = orderdEntries[i];
orderdEntries[i] = orderdEntries[j];
orderdEntries[j] = o;
}
@Override
protected int compare(int i, int j) {
final int ord1 = orderdEntries[i], ord2 = orderdEntries[j];
return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
}
@Override
protected void setPivot(int i) {
final int ord = orderdEntries[i];
get(pivot, ord);
}
@Override
protected int comparePivot(int j) {
final int ord = orderdEntries[j];
return comp.compare(pivot, get(scratch2, ord));
}
private final BytesRef pivot = new BytesRef(),
scratch1 = new BytesRef(), scratch2 = new BytesRef();
}.quickSort(0, size() - 1);
return orderdEntries;
}
}

View File

@ -22,6 +22,8 @@ import java.io.*;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
@ -36,7 +38,7 @@ public class FileDictionary implements Dictionary {
private BufferedReader in;
private String line;
private boolean hasNextCalled;
private boolean done = false;
public FileDictionary(InputStream dictFile) {
in = new BufferedReader(new InputStreamReader(dictFile));
@ -50,45 +52,39 @@ public class FileDictionary implements Dictionary {
}
public TermFreqIterator getWordsIterator() {
return new fileIterator();
return new FileIterator();
}
final class fileIterator implements TermFreqIterator {
final class FileIterator implements TermFreqIterator {
private float curFreq;
private final BytesRef spare = new BytesRef();
public String next() {
if (!hasNextCalled) {
hasNext();
}
hasNextCalled = false;
return line;
}
public float freq() {
return curFreq;
}
public boolean hasNext() {
hasNextCalled = true;
try {
line = in.readLine();
if (line != null) {
String[] fields = line.split("\t");
if (fields.length > 1) {
curFreq = Float.parseFloat(fields[1]);
line = fields[0];
} else {
curFreq = 1;
}
}
} catch (IOException ex) {
throw new RuntimeException(ex);
@Override
public BytesRef next() throws IOException {
if (done) {
return null;
}
line = in.readLine();
if (line != null) {
String[] fields = line.split("\t");
if (fields.length > 1) {
curFreq = Float.parseFloat(fields[1]);
spare.copyChars(fields[0]);
} else {
spare.copyChars(line);
curFreq = 1;
}
return spare;
} else {
done = true;
IOUtils.close(in);
return null;
}
return (line != null) ? true : false;
}
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -19,11 +19,13 @@ package org.apache.lucene.search.suggest;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
public abstract class Lookup {
@ -77,7 +79,7 @@ public abstract class Lookup {
* {@link UnsortedTermFreqIteratorWrapper} in such case.
*/
public void build(Dictionary dict) throws IOException {
Iterator<String> it = dict.getWordsIterator();
BytesRefIterator it = dict.getWordsIterator();
TermFreqIterator tfit;
if (it instanceof TermFreqIterator) {
tfit = (TermFreqIterator)it;
@ -89,6 +91,52 @@ public abstract class Lookup {
public abstract void build(TermFreqIterator tfit) throws IOException;
/**
* Look up a key and return possible completion for this key.
* @param key lookup key. Depending on the implementation this may be
* a prefix, misspelling, or even infix.
* @param onlyMorePopular return only more popular results
* @param num maximum number of results to return
* @return a list of possible completions, with their relative weight (e.g. popularity)
*/
// TODO: this should be a BytesRef API?
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
/**
* Modify the lookup data by recording additional data. Optional operation.
* @param key new lookup key
* @param value value to associate with this key
* @return true if new key is added, false if it already exists or operation
* is not supported.
*/
// TODO: this should be a BytesRef API?
public abstract boolean add(String key, Object value);
/**
* Get value associated with a specific key.
* @param key lookup key
* @return associated value
*/
// TODO: this should be a BytesRef API?
public abstract Object get(String key);
/**
* Persist the constructed lookup data to a directory. Optional operation.
* @param output {@link OutputStream} to write the data to.
* @return true if successful, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
public abstract boolean store(OutputStream output) throws IOException;
/**
* Discard current lookup data and load it from a previously saved copy.
* Optional operation.
* @param input the {@link InputStream} to load the lookup data.
* @return true if completed successfully, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
public abstract boolean load(InputStream input) throws IOException;
/**
* Persist the constructed lookup data to a directory. Optional operation.
* @param storeDir directory where data can be stored.
@ -105,30 +153,4 @@ public abstract class Lookup {
* @throws IOException when fatal IO error occurs.
*/
public abstract boolean load(File storeDir) throws IOException;
/**
* Look up a key and return possible completion for this key.
* @param key lookup key. Depending on the implementation this may be
* a prefix, misspelling, or even infix.
* @param onlyMorePopular return only more popular results
* @param num maximum number of results to return
* @return a list of possible completions, with their relative weight (e.g. popularity)
*/
public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
/**
* Modify the lookup data by recording additional data. Optional operation.
* @param key new lookup key
* @param value value to associate with this key
* @return true if new key is added, false if it already exists or operation
* is not supported.
*/
public abstract boolean add(String key, Object value);
/**
* Get value associated with a specific key.
* @param key lookup key
* @return associated value
*/
public abstract Object get(String key);
}

View File

@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
* limitations under the License.
*/
import java.util.Collections;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers incoming elements and makes sure they are sorted in
@ -28,8 +30,35 @@ import org.apache.lucene.search.spell.TermFreqIterator;
*/
public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
public SortedTermFreqIteratorWrapper(TermFreqIterator source) {
private final int[] sortedOrds;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
super(source);
Collections.sort(entries);
this.sortedOrds = entries.sort(comp);
this.comp = comp;
}
@Override
public float freq() {
return freqs[currentOrd];
}
@Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
return entries.get(spare, (currentOrd = sortedOrds[curPos]));
}
return null;
}
@Override
public Comparator<BytesRef> comparator() {
return comp;
}
}

View File

@ -17,9 +17,11 @@ package org.apache.lucene.search.suggest;
* limitations under the License.
*/
import java.util.Collections;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers the incoming elements and makes sure they are in
@ -27,8 +29,34 @@ import org.apache.lucene.search.spell.TermFreqIterator;
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) {
private final int[] ords;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
super(source);
Collections.shuffle(entries);
ords = new int[entries.size()];
Random random = new Random();
for (int i = 0; i < ords.length; i++) {
ords[i] = i;
}
for (int i = 0; i < ords.length; i++) {
int randomPosition = random.nextInt(ords.length);
int temp = ords[i];
ords[i] = ords[randomPosition];
ords[randomPosition] = temp;
}
}
@Override
public float freq() {
return freqs[currentOrd];
}
@Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
return entries.get(spare, (currentOrd = ords[curPos]));
}
return null;
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
@ -29,6 +31,8 @@ import org.apache.lucene.search.suggest.fst.Sort.SortInfo;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.*;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
@ -158,20 +162,17 @@ public class FSTCompletionLookup extends Lookup {
// If negative floats are allowed some trickery needs to be done to find their byte order.
boolean success = false;
try {
BytesRef tmp1 = new BytesRef();
byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
while (tfit.hasNext()) {
String key = tfit.next();
UnicodeUtil.UTF16toUTF8(key, 0, key.length(), tmp1);
if (tmp1.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, tmp1.length + 4);
BytesRef spare;
while ((spare = tfit.next()) != null) {
if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
}
output.reset(buffer);
output.writeInt(FloatMagic.toSortable(tfit.freq()));
output.writeBytes(tmp1.bytes, tmp1.offset, tmp1.length);
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
writer.close();
@ -189,6 +190,7 @@ public class FSTCompletionLookup extends Lookup {
int previousBucket = 0;
float previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
BytesRef tmp1 = new BytesRef();
BytesRef tmp2 = new BytesRef();
while (reader.read(tmp1)) {
input.reset(tmp1.bytes);
@ -293,4 +295,30 @@ public class FSTCompletionLookup extends Lookup {
normalCompletion.getFST().save(new File(storeDir, FILENAME));
return true;
}
@Override
public synchronized boolean store(OutputStream output) throws IOException {
if (this.normalCompletion == null)
return false;
try {
normalCompletion.getFST().save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
}
return true;
}
@Override
public synchronized boolean load(InputStream input) throws IOException {
try {
this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
new InputStreamDataInput(input), NoOutputs.getSingleton()));
this.normalCompletion = new FSTCompletion(
higherWeightsCompletion.getFST(), false, exactMatchFirst);
} finally {
IOUtils.close(input);
}
return true;
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -27,11 +29,12 @@ import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
@ -109,16 +112,14 @@ public class WFSTCompletionLookup extends Lookup {
try {
byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
while (iterator.hasNext()) {
String key = iterator.next();
UnicodeUtil.UTF16toUTF8(key, 0, key.length(), scratch);
if (scratch.length + 5 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, scratch.length + 5);
BytesRef spare;
while ((spare = iterator.next()) != null) {
if (spare.length + 5 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 5);
}
output.reset(buffer);
output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeByte((byte)0); // separator: not used, just for sort order
output.writeInt((int)encodeWeight(iterator.freq()));
writer.write(buffer, 0, output.getPosition());
@ -177,6 +178,26 @@ public class WFSTCompletionLookup extends Lookup {
this.fst = FST.read(new File(storeDir, FILENAME), PositiveIntOutputs.getSingleton(true));
return true;
}
@Override
public boolean store(OutputStream output) throws IOException {
try {
fst.save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
}
return true;
}
@Override
public boolean load(InputStream input) throws IOException {
try {
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true));
} finally {
IOUtils.close(input);
}
return true;
}
@Override
public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {

View File

@ -23,6 +23,8 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
@ -31,6 +33,10 @@ import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
public class JaspellLookup extends Lookup {
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
@ -41,17 +47,22 @@ public class JaspellLookup extends Lookup {
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof SortedIterator) {
// make sure it's unsorted
// WTF - this could result in yet another sorted iteration....
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
}
trie = new JaspellTernarySearchTrie();
trie.setMatchAlmostDiff(editDistance);
while (tfit.hasNext()) {
String key = tfit.next();
BytesRef spare;
final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
float freq = tfit.freq();
if (key.length() == 0) {
if (spare.length == 0) {
continue;
}
trie.put(key, new Float(freq));
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
trie.put(charsSpare.toString(), new Float(freq));
}
}
@ -114,15 +125,7 @@ public class JaspellLookup extends Lookup {
if (!data.exists() || !data.canRead()) {
return false;
}
DataInputStream in = new DataInputStream(new FileInputStream(data));
TSTNode root = trie.new TSTNode('\0', null);
try {
readRecursively(in, root);
trie.setRoot(root);
} finally {
in.close();
}
return true;
return load(new FileInputStream(data));
}
private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
@ -153,19 +156,8 @@ public class JaspellLookup extends Lookup {
if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
return false;
}
TSTNode root = trie.getRoot();
if (root == null) { // empty tree
return false;
}
File data = new File(storeDir, FILENAME);
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
try {
writeRecursively(out, root);
out.flush();
} finally {
out.close();
}
return true;
return store(new FileOutputStream(data));
}
private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
@ -186,4 +178,33 @@ public class JaspellLookup extends Lookup {
writeRecursively(out, node.relatives[TSTNode.EQKID]);
writeRecursively(out, node.relatives[TSTNode.HIKID]);
}
@Override
public boolean store(OutputStream output) throws IOException {
TSTNode root = trie.getRoot();
if (root == null) { // empty tree
return false;
}
DataOutputStream out = new DataOutputStream(output);
try {
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(out);
}
return true;
}
@Override
public boolean load(InputStream input) throws IOException {
DataInputStream in = new DataInputStream(input);
TSTNode root = trie.new TSTNode('\0', null);
try {
readRecursively(in, root);
trie.setRoot(root);
} finally {
IOUtils.close(in);
}
return true;
}
}

View File

@ -23,6 +23,8 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
@ -30,6 +32,10 @@ import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
public class TSTLookup extends Lookup {
TernaryTreeNode root = new TernaryTreeNode();
@ -39,15 +45,19 @@ public class TSTLookup extends Lookup {
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
if (!(tfit instanceof SortedIterator)) {
// make sure it's sorted
tfit = new SortedTermFreqIteratorWrapper(tfit);
if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
}
ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Float> vals = new ArrayList<Float>();
while (tfit.hasNext()) {
tokens.add(tfit.next());
BytesRef spare;
CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
vals.add(new Float(tfit.freq()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
@ -113,14 +123,7 @@ public class TSTLookup extends Lookup {
if (!data.exists() || !data.canRead()) {
return false;
}
DataInputStream in = new DataInputStream(new FileInputStream(data));
root = new TernaryTreeNode();
try {
readRecursively(in, root);
} finally {
in.close();
}
return true;
return load(new FileInputStream(data));
}
// pre-order traversal
@ -153,14 +156,7 @@ public class TSTLookup extends Lookup {
return false;
}
File data = new File(storeDir, FILENAME);
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
try {
writeRecursively(out, root);
out.flush();
} finally {
out.close();
}
return true;
return store(new FileOutputStream(data));
}
// pre-order traversal
@ -188,4 +184,28 @@ public class TSTLookup extends Lookup {
writeRecursively(out, node.hiKid);
}
}
@Override
public synchronized boolean store(OutputStream output) throws IOException {
DataOutputStream out = new DataOutputStream(output);
try {
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(output);
}
return true;
}
@Override
public synchronized boolean load(InputStream input) throws IOException {
DataInputStream in = new DataInputStream(input);
root = new TernaryTreeNode();
try {
readRecursively(in, root);
} finally {
IOUtils.close(in);
}
return true;
}
}

View File

@ -18,15 +18,17 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
/**
@ -40,7 +42,8 @@ public class TestLuceneDictionary extends LuceneTestCase {
private IndexReader indexReader = null;
private LuceneDictionary ld;
private Iterator<String> it;
private BytesRefIterator it;
private BytesRef spare = new BytesRef();
@Override
public void setUp() throws Exception {
@ -84,13 +87,12 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testFieldNonExistent() throws IOException {
try {
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "nonexistent_field");
it = ld.getWordsIterator();
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
assertNull("More elements than expected", spare = it.next());
} finally {
if (indexReader != null) { indexReader.close(); }
}
@ -98,15 +100,13 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testFieldAaa() throws IOException {
try {
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "aaa");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("foo"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
assertNotNull("First element doesn't exist.", spare = it.next());
assertTrue("First element isn't correct", spare.utf8ToString().equals("foo"));
assertNull("More elements than expected", it.next());
} finally {
if (indexReader != null) { indexReader.close(); }
}
@ -114,24 +114,22 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testFieldContents_1() throws IOException {
try {
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("Jerry"));
assertTrue("Second element doesn't exist.", it.hasNext());
assertTrue("Second element isn't correct", it.next().equals("Tom"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
assertNotNull("First element doesn't exist.", spare = it.next());
assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry"));
assertNotNull("Second element doesn't exist.", spare = it.next());
assertTrue("Second element isn't correct", spare.utf8ToString().equals("Tom"));
assertNull("More elements than expected", it.next());
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
int counter = 2;
while (it.hasNext()) {
it.next();
while (it.next() != null) {
counter--;
}
@ -144,30 +142,15 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testFieldContents_2() throws IOException {
try {
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
// hasNext() should have no side effects
assertTrue("First element isn't were it should be.", it.hasNext());
assertTrue("First element isn't were it should be.", it.hasNext());
assertTrue("First element isn't were it should be.", it.hasNext());
// just iterate through words
assertTrue("First element isn't correct", it.next().equals("Jerry"));
assertTrue("Second element isn't correct", it.next().equals("Tom"));
assertTrue("Nonexistent element is really null", it.next() == null);
// hasNext() should still have no side effects ...
assertFalse("There should be any more elements", it.hasNext());
assertFalse("There should be any more elements", it.hasNext());
assertFalse("There should be any more elements", it.hasNext());
// .. and there are really no more words
assertTrue("Nonexistent element is really null", it.next() == null);
assertTrue("Nonexistent element is really null", it.next() == null);
assertTrue("Nonexistent element is really null", it.next() == null);
assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString());
assertEquals("Second element isn't correct", "Tom", it.next().utf8ToString());
assertNull("Nonexistent element is really null", it.next());
}
finally {
if (indexReader != null) { indexReader.close(); }
@ -176,15 +159,14 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testFieldZzz() throws IOException {
try {
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
ld = new LuceneDictionary(indexReader, "zzz");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("bar"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
assertNotNull("First element doesn't exist.", spare = it.next());
assertEquals("First element isn't correct", "bar", spare.utf8ToString());
assertNull("More elements than expected", it.next());
}
finally {
if (indexReader != null) { indexReader.close(); }
@ -194,7 +176,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
public void testSpellchecker() throws IOException {
Directory dir = newDirectory();
SpellChecker sc = new SpellChecker(dir);
indexReader = IndexReader.open(store);
indexReader = DirectoryReader.open(store);
sc.indexDictionary(new LuceneDictionary(indexReader, "contents"), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
String[] suggestions = sc.suggestSimilar("Tam", 1);
assertEquals(1, suggestions.length);

View File

@ -191,7 +191,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
final List<String> input = new ArrayList<String>(benchmarkInput.size());
for (TermFreq tf : benchmarkInput) {
input.add(tf.term.substring(0, Math.min(tf.term.length(),
input.add(tf.term.utf8ToString().substring(0, Math.min(tf.term.length,
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
}

View File

@ -75,11 +75,11 @@ public class PersistenceTest extends LuceneTestCase {
// Assert validity.
float previous = Float.NEGATIVE_INFINITY;
for (TermFreq k : keys) {
Float val = (Float) lookup.get(k.term);
assertNotNull(k.term, val);
Float val = (Float) lookup.get(k.term.utf8ToString());
assertNotNull(k.term.utf8ToString(), val);
if (supportsExactWeights) {
assertEquals(k.term, Float.valueOf(k.v), val);
assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
} else {
assertTrue(val + ">=" + previous, val >= previous);
previous = val.floatValue();

View File

@ -1,5 +1,7 @@
package org.apache.lucene.search.suggest;
import org.apache.lucene.util.BytesRef;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -18,10 +20,14 @@ package org.apache.lucene.search.suggest;
*/
public final class TermFreq {
public final String term;
public final BytesRef term;
public final float v;
public TermFreq(String term, float v) {
this(new BytesRef(term), v);
}
public TermFreq(BytesRef term, float v) {
this.term = term;
this.v = v;
}

View File

@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
/**
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
@ -28,6 +30,7 @@ import org.apache.lucene.search.spell.TermFreqIterator;
public final class TermFreqArrayIterator implements TermFreqIterator {
private final Iterator<TermFreq> i;
private TermFreq current;
private final BytesRef spare = new BytesRef();
public TermFreqArrayIterator(Iterator<TermFreq> i) {
this.i = i;
@ -44,14 +47,14 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
public float freq() {
return current.v;
}
public boolean hasNext() {
return i.hasNext();
}
public String next() {
return (current = i.next()).term;
}
public void remove() { throw new UnsupportedOperationException(); }
@Override
public BytesRef next() throws IOException {
if (i.hasNext()) {
current = i.next();
spare.copyBytes(current.term);
return spare;
}
return null;
}
}

View File

@ -0,0 +1,85 @@
package org.apache.lucene.search.suggest;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestBytesRefList extends LuceneTestCase {
public void testAppend() throws IOException {
BytesRefList list = new BytesRefList();
List<String> stringList = new ArrayList<String>();
int entries = atLeast(500);
BytesRef spare = new BytesRef();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil
.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
list.append(spare);
stringList.add(randomRealisticUnicodeString);
}
for (int i = 0; i < entries; i++) {
assertNotNull(list.get(spare, i));
assertEquals("entry " + i + " doesn't match", stringList.get(i),
spare.utf8ToString());
}
// check random
for (int i = 0; i < entries; i++) {
int e = random.nextInt(entries);
assertNotNull(list.get(spare, e));
assertEquals("entry " + i + " doesn't match", stringList.get(e),
spare.utf8ToString());
}
for (int i = 0; i < 2; i++) {
BytesRefIterator iterator = list.iterator();
for (String string : stringList) {
assertEquals(string, iterator.next().utf8ToString());
}
}
}
public void testSort() {
BytesRefList list = new BytesRefList();
List<String> stringList = new ArrayList<String>();
int entries = atLeast(500);
BytesRef spare = new BytesRef();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
list.append(spare);
stringList.add(randomRealisticUnicodeString);
}
Collections.sort(stringList);
int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
for (int i = 0; i < entries; i++) {
assertNotNull(list.get(spare, sortedOrds[i]));
assertEquals("entry " + i + " doesn't match", stringList.get(i),
spare.utf8ToString());
}
}
}

View File

@ -40,7 +40,7 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionBuilder builder = new FSTCompletionBuilder();
for (TermFreq tf : evalKeys()) {
builder.add(new BytesRef(tf.term), (int) tf.v);
builder.add(tf.term, (int) tf.v);
}
completion = builder.build();
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
// are.
Float previous = null;
for (TermFreq tf : keys) {
Float current = lookup.get(tf.term);
Float current = lookup.get(tf.term.utf8ToString());
if (previous != null) {
assertEquals(previous, current);
}
@ -183,8 +183,8 @@ public class FSTCompletionTest extends LuceneTestCase {
lookup.build(new TermFreqArrayIterator(input));
for (TermFreq tf : input) {
assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null);
assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key);
assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
assertEquals(tf.term, lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
}
List<LookupResult> result = lookup.lookup("wit", true, 5);
@ -211,7 +211,7 @@ public class FSTCompletionTest extends LuceneTestCase {
lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
for (TermFreq tf : freqs) {
final String term = tf.term;
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {