comp) {
final int[] orderdEntries = new int[size()];
for (int i = 0; i < orderdEntries.length; i++) {
orderdEntries[i] = i;
@@ -110,22 +142,65 @@ final class BytesRefList {
final int ord1 = orderdEntries[i], ord2 = orderdEntries[j];
return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
}
-
+
@Override
protected void setPivot(int i) {
final int ord = orderdEntries[i];
get(pivot, ord);
}
-
+
@Override
protected int comparePivot(int j) {
final int ord = orderdEntries[j];
return comp.compare(pivot, get(scratch2, ord));
}
- private final BytesRef pivot = new BytesRef(),
- scratch1 = new BytesRef(), scratch2 = new BytesRef();
+ private final BytesRef pivot = new BytesRef(), scratch1 = new BytesRef(),
+ scratch2 = new BytesRef();
}.quickSort(0, size() - 1);
return orderdEntries;
}
+
+ /**
+ * sugar for {@link #iterator(Comparator)} with a null
comparator
+ */
+ public BytesRefIterator iterator() {
+ return iterator(null);
+ }
+
+ /**
+ *
+ * Returns a {@link BytesRefIterator} with point in time semantics. The
+ * iterator provides access to all so far appended {@link BytesRef} instances.
+ *
+ *
+ * If a non null
{@link Comparator} is provided the iterator will
+ * iterate the byte values in the order specified by the comparator. Otherwise
+ * the order is the same as the values were appended.
+ *
+ *
+ * This is a non-destructive operation.
+ *
+ */
+ public BytesRefIterator iterator(final Comparator comp) {
+ final BytesRef spare = new BytesRef();
+ final int size = size();
+ final int[] ords = comp == null ? null : sort(comp);
+ return new BytesRefIterator() {
+ int pos = 0;
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (pos < size) {
+ return get(spare, ords == null ? pos++ : ords[pos++]);
+ }
+ return null;
+ }
+
+ @Override
+ public Comparator getComparator() {
+ return comp;
+ }
+ };
+ }
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
index 059e1c23601..49f9f762f72 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
@@ -75,7 +75,11 @@ public class FileDictionary implements Dictionary {
String[] fields = line.split("\t");
if (fields.length > 1) {
// keep reading floats for bw compat
- curFreq = (int)Float.parseFloat(fields[1]);
+ try {
+ curFreq = Long.parseLong(fields[1]);
+ } catch (NumberFormatException e) {
+ curFreq = (long)Double.parseDouble(fields[1]);
+ }
spare.copyChars(fields[0]);
} else {
spare.copyChars(line);
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
index eff3ee7b594..f6abab61e2f 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
@@ -29,15 +29,19 @@ import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
+/**
+ * Simple Lookup interface for {@link CharSequence} suggestions.
+ * @lucene.experimental
+ */
public abstract class Lookup {
/**
* Result of a lookup.
*/
public static final class LookupResult implements Comparable {
public final CharSequence key;
- public final float value;
+ public final long value;
- public LookupResult(CharSequence key, float value) {
+ public LookupResult(CharSequence key, long value) {
this.key = key;
this.value = value;
}
@@ -112,6 +116,10 @@ public abstract class Lookup {
build(tfit);
}
+ /**
+ * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
+ * The implementation might re-sort the data internally.
+ */
public abstract void build(TermFreqIterator tfit) throws IOException;
/**
@@ -124,22 +132,7 @@ public abstract class Lookup {
*/
public abstract List lookup(CharSequence key, boolean onlyMorePopular, int num);
- /**
- * Modify the lookup data by recording additional data. Optional operation.
- * @param key new lookup key
- * @param value value to associate with this key
- * @return true if new key is added, false if it already exists or operation
- * is not supported.
- */
- public abstract boolean add(CharSequence key, Object value);
- /**
- * Get value associated with a specific key.
- * @param key lookup key
- * @return associated value
- */
- public abstract Object get(CharSequence key);
-
/**
* Persist the constructed lookup data to a directory. Optional operation.
* @param output {@link OutputStream} to write the data to.
@@ -173,4 +166,5 @@ public abstract class Lookup {
* @throws IOException when fatal IO error occurs.
*/
public abstract boolean load(File storeDir) throws IOException;
+
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
index 2380724c9a6..020618148be 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
@@ -17,45 +17,166 @@ package org.apache.lucene.search.suggest;
* limitations under the License.
*/
+import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.suggest.fst.Sort;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
/**
- * This wrapper buffers incoming elements and makes sure they are sorted in
- * ascending lexicographic order.
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
*/
-public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
- // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
- private final int[] sortedOrds;
- private int currentOrd = -1;
- private final BytesRef spare = new BytesRef();
- private final Comparator comp;
-
- public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comp) throws IOException {
- super(source);
- this.sortedOrds = entries.sort(comp);
- this.comp = comp;
+public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
+
+ private final TermFreqIterator source;
+ private File tempInput;
+ private File tempSorted;
+ private final ByteSequencesReader reader;
+ private boolean done = false;
+
+ private long weight;
+ private final BytesRef scratch = new BytesRef();
+ private final Comparator comparator;
+
+ public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comparator) throws IOException {
+ this(source, comparator, false);
}
-
- @Override
- public long weight() {
- return freqs[currentOrd];
+
+ public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comparator, boolean compareRawBytes) throws IOException {
+ this.source = source;
+ this.comparator = comparator;
+ this.reader = sort(compareRawBytes ? comparator : new BytesOnlyComparator(this.comparator));
}
-
+
@Override
public BytesRef next() throws IOException {
- if (++curPos < entries.size()) {
- return entries.get(spare, (currentOrd = sortedOrds[curPos]));
+ boolean success = false;
+ if (done) {
+ return null;
+ }
+ try {
+ ByteArrayDataInput input = new ByteArrayDataInput();
+ if (reader.read(scratch)) {
+ weight = decode(scratch, input);
+ success = true;
+ return scratch;
+ }
+ close();
+ success = done = true;
+ return null;
+ } finally {
+ if (!success) {
+ done = true;
+ close();
+ }
}
- return null;
}
-
+
@Override
public Comparator getComparator() {
- return comp;
+ return comparator;
+ }
+
+ @Override
+ public long weight() {
+ return weight;
+ }
+
+ private Sort.ByteSequencesReader sort(Comparator comparator) throws IOException {
+ String prefix = getClass().getSimpleName();
+ File directory = Sort.defaultTempDir();
+ tempInput = File.createTempFile(prefix, ".input", directory);
+ tempSorted = File.createTempFile(prefix, ".sorted", directory);
+
+ final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+ boolean success = false;
+ try {
+ BytesRef spare;
+ byte[] buffer = new byte[0];
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+ while ((spare = source.next()) != null) {
+ encode(writer, output, buffer, spare, source.weight());
+ }
+ writer.close();
+ new Sort(comparator).sort(tempInput, tempSorted);
+ ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+ success = true;
+ return reader;
+
+ } finally {
+ if (success) {
+ IOUtils.close(writer);
+ } else {
+ try {
+ IOUtils.closeWhileHandlingException(writer);
+ } finally {
+ close();
+ }
+ }
+
+ }
+ }
+
+ private void close() throws IOException {
+ if (tempInput != null) {
+ tempInput.delete();
+ }
+ if (tempSorted != null) {
+ tempSorted.delete();
+ }
+ IOUtils.close(reader);
+ }
+
+ private final static class BytesOnlyComparator implements Comparator {
+
+ final Comparator other;
+ private final BytesRef leftScratch = new BytesRef();
+ private final BytesRef rightScratch = new BytesRef();
+
+ public BytesOnlyComparator(Comparator other) {
+ this.other = other;
+ }
+
+ @Override
+ public int compare(BytesRef left, BytesRef right) {
+ wrap(leftScratch, left);
+ wrap(rightScratch, right);
+ return other.compare(leftScratch, rightScratch);
+ }
+
+ private void wrap(BytesRef wrapper, BytesRef source) {
+ wrapper.bytes = source.bytes;
+ wrapper.offset = source.offset;
+ wrapper.length = source.length - 8;
+
+ }
+ }
+
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+ if (spare.length + 8 >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, spare.length + 8);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ output.writeLong(weight);
+ writer.write(buffer, 0, output.getPosition());
+ }
+
+ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
+ scratch.length -= 8; // sep + long
+ return tmpInput.readLong();
}
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
index 4a7e3d8d027..a97b170bdb4 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
@@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers the incoming elements and makes sure they are in
* random order.
+ * @lucene.experimental
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
// TODO keep this for now
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
index c7f42cb812b..3d141023550 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
@@ -18,13 +18,16 @@ package org.apache.lucene.search.suggest.fst;
*/
import java.io.IOException;
-import java.util.Iterator;
+import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
/**
* Collects {@link BytesRef} and then allows one to iterate over their sorted order. Implementations
- * of this interface will be called in a single-threaded scenario.
+ * of this interface will be called in a single-threaded scenario.
+ * @lucene.experimental
+ * @lucene.internal
*/
public interface BytesRefSorter {
/**
@@ -42,5 +45,7 @@ public interface BytesRefSorter {
*
* @throws IOException If an I/O exception occurs.
*/
- Iterator iterator() throws IOException;
+ BytesRefIterator iterator() throws IOException;
+
+ Comparator getComparator();
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
index a28d57f229e..77995c11843 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
@@ -18,59 +18,63 @@ package org.apache.lucene.search.suggest.fst;
*/
import java.io.*;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
+import java.util.Comparator;
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
/**
* Builds and iterates over sequences stored on disk.
+ * @lucene.experimental
+ * @lucene.internal
*/
public class ExternalRefSorter implements BytesRefSorter, Closeable {
private final Sort sort;
private Sort.ByteSequencesWriter writer;
private File input;
- private File sorted;
-
+ private File sorted;
+
/**
* Will buffer all sequences to a temporary file and then sort (all on-disk).
*/
public ExternalRefSorter(Sort sort) throws IOException {
this.sort = sort;
- this.input = File.createTempFile("RefSorter-", ".raw", Sort.defaultTempDir());
+ this.input = File.createTempFile("RefSorter-", ".raw",
+ Sort.defaultTempDir());
this.writer = new Sort.ByteSequencesWriter(input);
}
-
+
@Override
public void add(BytesRef utf8) throws IOException {
- if (writer == null)
- throw new IllegalStateException();
+ if (writer == null) throw new IllegalStateException();
writer.write(utf8);
}
-
- @Override
- public Iterator iterator() throws IOException {
+
+ public BytesRefIterator iterator() throws IOException {
if (sorted == null) {
closeWriter();
-
- sorted = File.createTempFile("RefSorter-", ".sorted", Sort.defaultTempDir());
+
+ sorted = File.createTempFile("RefSorter-", ".sorted",
+ Sort.defaultTempDir());
sort.sort(input, sorted);
-
+
input.delete();
input = null;
}
-
- return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted));
+
+ return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted),
+ sort.getComparator());
}
-
+
private void closeWriter() throws IOException {
if (writer != null) {
writer.close();
writer = null;
}
}
-
+
/**
* Removes any written temporary files.
*/
@@ -83,40 +87,54 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
if (sorted != null) sorted.delete();
}
}
-
+
/**
* Iterate over byte refs in a file.
*/
- class ByteSequenceIterator implements Iterator {
- private ByteSequencesReader reader;
- private byte[] next;
-
- public ByteSequenceIterator(ByteSequencesReader reader) throws IOException {
+ class ByteSequenceIterator implements BytesRefIterator {
+ private final ByteSequencesReader reader;
+ private BytesRef scratch = new BytesRef();
+ private final Comparator comparator;
+
+ public ByteSequenceIterator(ByteSequencesReader reader,
+ Comparator comparator) {
this.reader = reader;
- this.next = reader.read();
- }
-
- @Override
- public boolean hasNext() {
- return next != null;
+ this.comparator = comparator;
}
@Override
- public BytesRef next() {
- if (next == null) throw new NoSuchElementException();
- BytesRef r = new BytesRef(next);
- try {
- next = reader.read();
- if (next == null) {
- reader.close();
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
+ public BytesRef next() throws IOException {
+ if (scratch == null) {
+ return null;
+ }
+ boolean success = false;
+ try {
+ byte[] next = reader.read();
+ if (next != null) {
+ scratch.bytes = next;
+ scratch.length = next.length;
+ scratch.offset = 0;
+ } else {
+ IOUtils.close(reader);
+ scratch = null;
+ }
+ success = true;
+ return scratch;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(reader);
+ }
}
- return r;
}
-
+
@Override
- public void remove() { throw new UnsupportedOperationException(); }
+ public Comparator getComparator() {
+ return comparator;
+ }
+ }
+
+ @Override
+ public Comparator getComparator() {
+ return sort.getComparator();
}
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
index 59fdc4cde75..9e49b1e2795 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
@@ -28,6 +28,7 @@ import org.apache.lucene.util.fst.FST.Arc;
* Finite state automata based implementation of "autocomplete" functionality.
*
* @see FSTCompletionBuilder
+ * @lucene.experimental
*/
// TODO: we could store exact weights as outputs from the FST (int4 encoded
@@ -159,10 +160,10 @@ public class FSTCompletion {
* @param utf8
* The sequence of utf8 bytes to follow.
*
- * @return Returns the bucket number of the match or null
if no
+ * @return Returns the bucket number of the match or -1
if no
* match was found.
*/
- private Integer getExactMatchStartingFromRootArc(
+ private int getExactMatchStartingFromRootArc(
int rootArcIndex, BytesRef utf8) {
// Get the UTF-8 bytes representation of the input key.
try {
@@ -186,7 +187,7 @@ public class FSTCompletion {
}
// No match.
- return null;
+ return -1;
}
/**
@@ -273,8 +274,8 @@ public class FSTCompletion {
// exact match, if requested.
if (exactFirst) {
if (!checkExistingAndReorder(res, key)) {
- Integer exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
- if (exactMatchBucket != null) {
+ int exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
+ if (exactMatchBucket != -1) {
// Insert as the first result and truncate at num.
while (res.size() >= num) {
res.remove(res.size() - 1);
@@ -385,10 +386,10 @@ public class FSTCompletion {
}
/**
- * Returns the bucket assigned to a given key (if found) or null
if
+ * Returns the bucket assigned to a given key (if found) or -1
if
* no exact match exists.
*/
- public Integer getBucket(CharSequence key) {
+ public int getBucket(CharSequence key) {
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
index f82194d6c2b..ba4c5c7cf2f 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
@@ -19,9 +19,9 @@ package org.apache.lucene.search.suggest.fst;
import java.io.Closeable;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
@@ -98,6 +98,7 @@ import org.apache.lucene.util.fst.*;
* change, requiring you to rebuild the FST suggest index.
*
* @see FSTCompletion
+ * @lucene.experimental
*/
public class FSTCompletionBuilder {
/**
@@ -143,10 +144,11 @@ public class FSTCompletionBuilder {
/**
* Creates an {@link FSTCompletion} with default options: 10 buckets, exact match
- * promoted to first position and {@link InMemorySorter}.
+ * promoted to first position and {@link InMemorySorter} with a comparator obtained from
+ * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}.
*/
public FSTCompletionBuilder() {
- this(DEFAULT_BUCKETS, new InMemorySorter(), Integer.MAX_VALUE);
+ this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE);
}
/**
@@ -237,10 +239,12 @@ public class FSTCompletionBuilder {
shareMaxTailLength, outputs, null, false);
BytesRef scratch = new BytesRef();
+ BytesRef entry;
final IntsRef scratchIntsRef = new IntsRef();
int count = 0;
- for (Iterator i = sorter.iterator(); i.hasNext(); count++) {
- BytesRef entry = i.next();
+ BytesRefIterator iter = sorter.iterator();
+ while((entry = iter.next()) != null) {
+ count++;
if (scratch.compareTo(entry) != 0) {
builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
scratch.copyBytes(entry);
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
index aee2ea1c502..9bd0ce79170 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
@@ -59,6 +59,7 @@ import org.apache.lucene.util.fst.NoOutputs;
* use {@link FSTCompletion} directly or {@link TSTLookup}, for example.
*
* @see FSTCompletion
+ * @lucene.experimental
*/
public class FSTCompletionLookup extends Lookup {
/**
@@ -171,7 +172,7 @@ public class FSTCompletionLookup extends Lookup {
}
output.reset(buffer);
- output.writeInt(FloatMagic.toSortable(tfit.weight()));
+ output.writeInt(encodeWeight(tfit.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
@@ -188,13 +189,13 @@ public class FSTCompletionLookup extends Lookup {
reader = new Sort.ByteSequencesReader(tempSorted);
long line = 0;
int previousBucket = 0;
- float previousScore = 0;
+ int previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
BytesRef tmp1 = new BytesRef();
BytesRef tmp2 = new BytesRef();
while (reader.read(tmp1)) {
input.reset(tmp1.bytes);
- float currentScore = FloatMagic.fromSortable(input.readInt());
+ int currentScore = input.readInt();
int bucket;
if (line > 0 && currentScore == previousScore) {
@@ -230,6 +231,14 @@ public class FSTCompletionLookup extends Lookup {
tempSorted.delete();
}
}
+
+ /** weight -> cost */
+ private static int encodeWeight(long value) {
+ if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
+ throw new UnsupportedOperationException("cannot encode value: " + value);
+ }
+ return (int)value;
+ }
@Override
public List lookup(CharSequence key, boolean higherWeightsFirst, int num) {
@@ -250,19 +259,9 @@ public class FSTCompletionLookup extends Lookup {
return results;
}
- @Override
- public boolean add(CharSequence key, Object value) {
- // Not supported.
- return false;
- }
-
- @Override
public Object get(CharSequence key) {
- Integer bucket = normalCompletion.getBucket(key);
- if (bucket == null)
- return null;
- else
- return (float) normalCompletion.getBucket(key) / normalCompletion.getBucketCount();
+ final int bucket = normalCompletion.getBucket(key);
+ return bucket == -1 ? null : Long.valueOf(bucket);
}
/**
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java
deleted file mode 100644
index 16583566fa1..00000000000
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java
+++ /dev/null
@@ -1,75 +0,0 @@
-package org.apache.lucene.search.suggest.fst;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.NumericUtils;
-
-/**
- * Converts normalized float representations ({@link Float#floatToIntBits(float)})
- * into integers that are directly sortable in int4 representation (or unsigned values or
- * after promoting to a long with higher 32-bits zeroed).
- */
-class FloatMagic {
- /**
- * Convert a float to a directly sortable unsigned integer. For sortable signed
- * integers, see {@link NumericUtils#floatToSortableInt(float)}.
- */
- public static int toSortable(float f) {
- return floatBitsToUnsignedOrdered(Float.floatToRawIntBits(f));
- }
-
- /**
- * Back from {@link #toSortable(float)} to float.
- */
- public static float fromSortable(int v) {
- return Float.intBitsToFloat(unsignedOrderedToFloatBits(v));
- }
-
- /**
- * Convert float bits to directly sortable bits.
- * Normalizes all NaNs to canonical form.
- */
- static int floatBitsToUnsignedOrdered(int v) {
- // Canonicalize NaN ranges. I assume this check will be faster here than
- // (v == v) == false on the FPU? We don't distinguish between different
- // flavors of NaNs here (see http://en.wikipedia.org/wiki/NaN). I guess
- // in Java this doesn't matter much anyway.
- if ((v & 0x7fffffff) > 0x7f800000) {
- // Apply the logic below to a canonical "quiet NaN"
- return 0x7fc00000 ^ 0x80000000;
- }
-
- if (v < 0) {
- // Reverse the order of negative values and push them before positive values.
- return ~v;
- } else {
- // Shift positive values after negative, but before NaNs, they're sorted already.
- return v ^ 0x80000000;
- }
- }
-
- /**
- * Back from {@link #floatBitsToUnsignedOrdered(int)}.
- */
- static int unsignedOrderedToFloatBits(int v) {
- if (v < 0)
- return v & ~0x80000000;
- else
- return ~v;
- }
-}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
index 1e293530a46..ce6a17d721f 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
@@ -17,29 +17,40 @@ package org.apache.lucene.search.suggest.fst;
* limitations under the License.
*/
-import java.util.*;
+import java.util.Comparator;
+import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
/**
* An {@link BytesRefSorter} that keeps all the entries in memory.
+ * @lucene.experimental
+ * @lucene.internal
*/
public final class InMemorySorter implements BytesRefSorter {
- // TODO: use a single byte[] to back up all entries?
- private final ArrayList refs = new ArrayList();
-
+ private final BytesRefList buffer = new BytesRefList();
private boolean closed = false;
+ private final Comparator comparator;
+ public InMemorySorter(Comparator comparator) {
+ this.comparator = comparator;
+ }
+
@Override
public void add(BytesRef utf8) {
if (closed) throw new IllegalStateException();
- refs.add(BytesRef.deepCopyOf(utf8));
+ buffer.append(utf8);
}
@Override
- public Iterator iterator() {
+ public BytesRefIterator iterator() {
closed = true;
- Collections.sort(refs, BytesRef.getUTF8SortedAsUnicodeComparator());
- return Collections.unmodifiableCollection(refs).iterator();
+ return buffer.iterator(comparator);
+ }
+
+ @Override
+ public Comparator getComparator() {
+ return comparator;
}
}
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
index 47942ed2d9e..8d9e5e3724a 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
@@ -20,15 +20,10 @@ package org.apache.lucene.search.suggest.fst;
import java.io.*;
import java.util.*;
+import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.*;
import org.apache.lucene.util.PriorityQueue;
-// TODO: the buffer is currently byte[][] which with very small arrays will terribly overallocate
-// memory (alignments) and make GC very happy.
-//
-// We could move it to a single byte[] + and use custom sorting, but we'd need to check if this
-// yields any improvement first.
-
/**
* On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
* fields:
@@ -38,6 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
*
*
* @see #sort(File, File)
+ * @lucene.experimental
+ * @lucene.internal
*/
public final class Sort {
public final static int MB = 1024 * 1024;
@@ -59,11 +56,6 @@ public final class Sort {
*/
public final static int MAX_TEMPFILES = 128;
- /**
- * Minimum slot buffer expansion.
- */
- private final static int MIN_EXPECTED_GROWTH = 1000;
-
/**
* A bit more descriptive unit for constructors.
*
@@ -111,21 +103,6 @@ public final class Sort {
}
}
- /**
- * byte[] in unsigned byte order.
- */
- static final Comparator unsignedByteOrderComparator = new Comparator() {
- public int compare(byte[] left, byte[] right) {
- final int max = Math.min(left.length, right.length);
- for (int i = 0, j = 0; i < max; i++, j++) {
- int diff = (left[i] & 0xff) - (right[j] & 0xff);
- if (diff != 0)
- return diff;
- }
- return left.length - right.length;
- }
- };
-
/**
* Sort info (debugging mostly).
*/
@@ -149,14 +126,15 @@ public final class Sort {
}
}
- private final static byte [][] EMPTY = new byte [0][];
-
private final BufferSize ramBufferSize;
private final File tempDirectory;
-
- private byte [][] buffer = new byte [0][];
+
+ private final BytesRefList buffer = new BytesRefList();
private SortInfo sortInfo;
private int maxTempFiles;
+ private final Comparator comparator;
+
+ public static final Comparator DEFAULT_COMPARATOR = BytesRef.getUTF8SortedAsUnicodeComparator();
/**
* Defaults constructor.
@@ -165,13 +143,17 @@ public final class Sort {
* @see BufferSize#automatic()
*/
public Sort() throws IOException {
- this(BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+ this(DEFAULT_COMPARATOR, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+ }
+
+ public Sort(Comparator comparator) throws IOException {
+ this(comparator, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
}
/**
* All-details constructor.
*/
- public Sort(BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
+ public Sort(Comparator comparator, BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
}
@@ -183,6 +165,7 @@ public final class Sort {
this.ramBufferSize = ramBufferSize;
this.tempDirectory = tempDirectory;
this.maxTempFiles = maxTempfiles;
+ this.comparator = comparator;
}
/**
@@ -283,23 +266,25 @@ public final class Sort {
/** Sort a single partition in-memory. */
protected File sortPartition(int len) throws IOException {
- byte [][] data = this.buffer;
+ BytesRefList data = this.buffer;
File tempFile = File.createTempFile("sort", "partition", tempDirectory);
long start = System.currentTimeMillis();
- Arrays.sort(data, 0, len, unsignedByteOrderComparator);
sortInfo.sortTime += (System.currentTimeMillis() - start);
- ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+ final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+ BytesRef spare;
try {
- for (int i = 0; i < len; i++) {
- assert data[i].length <= Short.MAX_VALUE;
- out.write(data[i]);
+ BytesRefIterator iter = buffer.iterator(comparator);
+ while((spare = iter.next()) != null) {
+ assert spare.length <= Short.MAX_VALUE;
+ out.write(spare);
}
+
out.close();
// Clean up the buffer for the next partition.
- this.buffer = EMPTY;
+ data.clear();
return tempFile;
} finally {
IOUtils.close(out);
@@ -314,7 +299,7 @@ public final class Sort {
PriorityQueue queue = new PriorityQueue(merges.size()) {
protected boolean lessThan(FileAndTop a, FileAndTop b) {
- return a.current.compareTo(b.current) < 0;
+ return comparator.compare(a.current, b.current) < 0;
}
};
@@ -359,33 +344,18 @@ public final class Sort {
/** Read in a single partition of data */
int readPartition(ByteSequencesReader reader) throws IOException {
long start = System.currentTimeMillis();
-
- // We will be reallocating from scratch.
- Arrays.fill(this.buffer, null);
-
- int bytesLimit = this.ramBufferSize.bytes;
- byte [][] data = this.buffer;
- byte[] line;
- int linesRead = 0;
- while ((line = reader.read()) != null) {
- if (linesRead + 1 >= data.length) {
- data = Arrays.copyOf(data,
- ArrayUtil.oversize(linesRead + MIN_EXPECTED_GROWTH,
- RamUsageEstimator.NUM_BYTES_OBJECT_REF));
- }
- data[linesRead++] = line;
-
+ final BytesRef scratch = new BytesRef();
+ while ((scratch.bytes = reader.read()) != null) {
+ scratch.length = scratch.bytes.length;
+ buffer.append(scratch);
// Account for the created objects.
// (buffer slots do not account to buffer size.)
- bytesLimit -= line.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
- if (bytesLimit < 0) {
+ if (ramBufferSize.bytes < buffer.bytesUsed()) {
break;
}
}
- this.buffer = data;
-
sortInfo.readTime += (System.currentTimeMillis() - start);
- return linesRead;
+ return buffer.size();
}
static class FileAndTop {
@@ -515,5 +485,9 @@ public final class Sort {
((Closeable) is).close();
}
}
+ }
+
+ public Comparator getComparator() {
+ return comparator;
}
}
\ No newline at end of file
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
index f5f37c36c8a..330cf3c82a4 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
@@ -28,6 +28,8 @@ import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -102,72 +104,27 @@ public class WFSTCompletionLookup extends Lookup {
@Override
public void build(TermFreqIterator iterator) throws IOException {
- String prefix = getClass().getSimpleName();
- File directory = Sort.defaultTempDir();
- File tempInput = File.createTempFile(prefix, ".input", directory);
- File tempSorted = File.createTempFile(prefix, ".sorted", directory);
-
- Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
- Sort.ByteSequencesReader reader = null;
BytesRef scratch = new BytesRef();
-
- boolean success = false;
- try {
- byte [] buffer = new byte [0];
- ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
- BytesRef spare;
- while ((spare = iterator.next()) != null) {
- if (spare.length + 5 >= buffer.length) {
- buffer = ArrayUtil.grow(buffer, spare.length + 5);
- }
-
- output.reset(buffer);
- output.writeBytes(spare.bytes, spare.offset, spare.length);
- output.writeByte((byte)0); // separator: not used, just for sort order
- output.writeInt((int)encodeWeight(iterator.weight()));
- writer.write(buffer, 0, output.getPosition());
- }
- writer.close();
- new Sort().sort(tempInput, tempSorted);
- reader = new Sort.ByteSequencesReader(tempSorted);
+ TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator,
+ BytesRef.getUTF8SortedAsUnicodeComparator());
+ IntsRef scratchInts = new IntsRef();
+ BytesRef previous = null;
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs);
+ while ((scratch = iter.next()) != null) {
+ long cost = iter.weight();
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
- Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs);
-
- BytesRef previous = null;
- BytesRef suggestion = new BytesRef();
- IntsRef scratchInts = new IntsRef();
- ByteArrayDataInput input = new ByteArrayDataInput();
- while (reader.read(scratch)) {
- suggestion.bytes = scratch.bytes;
- suggestion.offset = scratch.offset;
- suggestion.length = scratch.length - 5; // int + separator
-
- input.reset(scratch.bytes);
- input.skipBytes(suggestion.length + 1); // suggestion + separator
- long cost = input.readInt();
-
- if (previous == null) {
- previous = new BytesRef();
- } else if (suggestion.equals(previous)) {
- continue; // for duplicate suggestions, the best weight is actually added
- }
- Util.toIntsRef(suggestion, scratchInts);
- builder.add(scratchInts, cost);
- previous.copyBytes(suggestion);
+ if (previous == null) {
+ previous = new BytesRef();
+ } else if (scratch.equals(previous)) {
+ continue; // for duplicate suggestions, the best weight is actually
+ // added
}
- fst = builder.finish();
- success = true;
- } finally {
- if (success) {
- IOUtils.close(reader, writer);
- } else {
- IOUtils.closeWhileHandlingException(reader, writer);
- }
-
- tempInput.delete();
- tempSorted.delete();
+ Util.toIntsRef(scratch, scratchInts);
+ builder.add(scratchInts, cost);
+ previous.copyBytes(scratch);
}
+ fst = builder.finish();
}
@Override
@@ -270,16 +227,10 @@ public class WFSTCompletionLookup extends Lookup {
return output;
}
- @Override
- public boolean add(CharSequence key, Object value) {
- return false; // Not supported.
- }
-
/**
* Returns the weight associated with an input string,
* or null if it does not exist.
*/
- @Override
public Object get(CharSequence key) {
Arc arc = new Arc();
Long result = null;
@@ -289,23 +240,51 @@ public class WFSTCompletionLookup extends Lookup {
if (result == null || !arc.isFinal()) {
return null;
} else {
- return decodeWeight(result + arc.nextFinalOutput);
+ return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput));
}
}
/** cost -> weight */
- private static float decodeWeight(long encoded) {
- return Integer.MAX_VALUE - encoded;
+ private static int decodeWeight(long encoded) {
+ return (int)(Integer.MAX_VALUE - encoded);
}
/** weight -> cost */
- private static long encodeWeight(float value) {
- if (Float.isNaN(value) || Float.isInfinite(value) || value < 0 || value > Integer.MAX_VALUE) {
+ private static int encodeWeight(long value) {
+ if (value < 0 || value > Integer.MAX_VALUE) {
throw new UnsupportedOperationException("cannot encode value: " + value);
}
return Integer.MAX_VALUE - (int)value;
}
+ private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
+
+ WFSTTermFreqIteratorWrapper(TermFreqIterator source,
+ Comparator comparator) throws IOException {
+ super(source, comparator, true);
+ }
+
+ @Override
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+ if (spare.length + 5 >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, spare.length + 5);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ output.writeByte((byte)0); // separator: not used, just for sort order
+ output.writeInt(encodeWeight(weight));
+ writer.write(buffer, 0, output.getPosition());
+ }
+
+ @Override
+ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 4); // suggestion + separator
+ scratch.length -= 5; // sep + long
+ return tmpInput.readInt();
+ }
+ }
+
static final Comparator weightComparator = new Comparator () {
public int compare(Long left, Long right) {
return left.compareTo(right);
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
index 56a47514506..b7bb15e8a46 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
@@ -55,24 +55,22 @@ public class JaspellLookup extends Lookup {
final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
- float freq = tfit.weight();
+ final long weight = tfit.weight();
if (spare.length == 0) {
continue;
}
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
- trie.put(charsSpare.toString(), new Float(freq));
+ trie.put(charsSpare.toString(), Long.valueOf(weight));
}
}
- @Override
public boolean add(CharSequence key, Object value) {
trie.put(key, value);
// XXX
return false;
}
- @Override
public Object get(CharSequence key) {
return trie.get(key);
}
@@ -95,7 +93,7 @@ public class JaspellLookup extends Lookup {
if (onlyMorePopular) {
LookupPriorityQueue queue = new LookupPriorityQueue(num);
for (String s : list) {
- float freq = (Float)trie.get(s);
+ long freq = ((Number)trie.get(s)).longValue();
queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
}
for (LookupResult lr : queue.getResults()) {
@@ -104,7 +102,7 @@ public class JaspellLookup extends Lookup {
} else {
for (int i = 0; i < maxCnt; i++) {
String s = list.get(i);
- float freq = (Float)trie.get(s);
+ long freq = ((Number)trie.get(s)).longValue();
res.add(new LookupResult(new CharsRef(s), freq));
}
}
@@ -131,7 +129,7 @@ public class JaspellLookup extends Lookup {
node.splitchar = in.readChar();
byte mask = in.readByte();
if ((mask & HAS_VALUE) != 0) {
- node.data = new Float(in.readFloat());
+ node.data = Long.valueOf(in.readLong());
}
if ((mask & LO_KID) != 0) {
TSTNode kid = trie.new TSTNode('\0', node);
@@ -171,7 +169,7 @@ public class JaspellLookup extends Lookup {
if (node.data != null) mask |= HAS_VALUE;
out.writeByte(mask);
if (node.data != null) {
- out.writeFloat((Float)node.data);
+ out.writeLong(((Number)node.data).longValue());
}
writeRecursively(out, node.relatives[TSTNode.LOKID]);
writeRecursively(out, node.relatives[TSTNode.EQKID]);
diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
index 56b00a3ca6a..99e4e6a8c46 100644
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
@@ -50,26 +50,24 @@ public class TSTLookup extends Lookup {
}
ArrayList tokens = new ArrayList();
- ArrayList vals = new ArrayList();
+ ArrayList vals = new ArrayList();
BytesRef spare;
CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
- vals.add(new Float(tfit.weight()));
+ vals.add(Long.valueOf(tfit.weight()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
- @Override
public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created
return true;
}
- @Override
public Object get(CharSequence key) {
List list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) {
@@ -107,7 +105,7 @@ public class TSTLookup extends Lookup {
if (onlyMorePopular) {
LookupPriorityQueue queue = new LookupPriorityQueue(num);
for (TernaryTreeNode ttn : list) {
- queue.insertWithOverflow(new LookupResult(ttn.token, (Float)ttn.val));
+ queue.insertWithOverflow(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
}
for (LookupResult lr : queue.getResults()) {
res.add(lr);
@@ -115,7 +113,7 @@ public class TSTLookup extends Lookup {
} else {
for (int i = 0; i < maxCnt; i++) {
TernaryTreeNode ttn = list.get(i);
- res.add(new LookupResult(ttn.token, (Float)ttn.val));
+ res.add(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
}
}
return res;
@@ -146,7 +144,7 @@ public class TSTLookup extends Lookup {
node.token = in.readUTF();
}
if ((mask & HAS_VALUE) != 0) {
- node.val = new Float(in.readFloat());
+ node.val = Long.valueOf(in.readLong());
}
if ((mask & LO_KID) != 0) {
node.loKid = new TernaryTreeNode();
@@ -184,7 +182,7 @@ public class TSTLookup extends Lookup {
if (node.val != null) mask |= HAS_VALUE;
out.writeByte(mask);
if (node.token != null) out.writeUTF(node.token);
- if (node.val != null) out.writeFloat((Float)node.val);
+ if (node.val != null) out.writeLong(((Number)node.val).longValue());
// recurse and write kids
if (node.loKid != null) {
writeRecursively(out, node.loKid);
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
index a2deec4d6c9..73f5ae82dad 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
@@ -17,8 +17,10 @@
package org.apache.lucene.search.suggest;
import java.io.File;
+import java.util.List;
import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
@@ -74,16 +76,18 @@ public class PersistenceTest extends LuceneTestCase {
lookup.load(storeDir);
// Assert validity.
- float previous = Float.NEGATIVE_INFINITY;
+ long previous = Long.MIN_VALUE;
for (TermFreq k : keys) {
- Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
- assertNotNull(k.term.utf8ToString(), val);
+ List list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
+ assertEquals(1, list.size());
+ LookupResult lookupResult = list.get(0);
+ assertNotNull(k.term.utf8ToString(), lookupResult.key);
if (supportsExactWeights) {
- assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
+ assertEquals(k.term.utf8ToString(), k.v, lookupResult.value);
} else {
- assertTrue(val + ">=" + previous, val >= previous);
- previous = val.floatValue();
+ assertTrue(lookupResult.value + ">=" + previous, lookupResult.value >= previous);
+ previous = lookupResult.value;
}
}
}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
index 81952818ce2..ca997fabc28 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
@@ -29,59 +29,79 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestBytesRefList extends LuceneTestCase {
-
+
public void testAppend() throws IOException {
BytesRefList list = new BytesRefList();
List stringList = new ArrayList();
- int entries = atLeast(500);
- BytesRef spare = new BytesRef();
- for (int i = 0; i < entries; i++) {
- String randomRealisticUnicodeString = _TestUtil
- .randomRealisticUnicodeString(random);
- spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
- stringList.add(randomRealisticUnicodeString);
- }
- for (int i = 0; i < entries; i++) {
- assertNotNull(list.get(spare, i));
- assertEquals("entry " + i + " doesn't match", stringList.get(i),
- spare.utf8ToString());
- }
-
- // check random
- for (int i = 0; i < entries; i++) {
- int e = random.nextInt(entries);
- assertNotNull(list.get(spare, e));
- assertEquals("entry " + i + " doesn't match", stringList.get(e),
- spare.utf8ToString());
- }
- for (int i = 0; i < 2; i++) {
-
- BytesRefIterator iterator = list.iterator();
- for (String string : stringList) {
- assertEquals(string, iterator.next().utf8ToString());
+ for (int j = 0; j < 2; j++) {
+ if (j > 0 && random.nextBoolean()) {
+ list.clear();
+ stringList.clear();
+ }
+ int entries = atLeast(500);
+ BytesRef spare = new BytesRef();
+ for (int i = 0; i < entries; i++) {
+ String randomRealisticUnicodeString = _TestUtil
+ .randomRealisticUnicodeString(random);
+ spare.copyChars(randomRealisticUnicodeString);
+ list.append(spare);
+ stringList.add(randomRealisticUnicodeString);
+ }
+ for (int i = 0; i < entries; i++) {
+ assertNotNull(list.get(spare, i));
+ assertEquals("entry " + i + " doesn't match", stringList.get(i),
+ spare.utf8ToString());
+ }
+
+ // check random
+ for (int i = 0; i < entries; i++) {
+ int e = random.nextInt(entries);
+ assertNotNull(list.get(spare, e));
+ assertEquals("entry " + i + " doesn't match", stringList.get(e),
+ spare.utf8ToString());
+ }
+ for (int i = 0; i < 2; i++) {
+
+ BytesRefIterator iterator = list.iterator();
+ for (String string : stringList) {
+ assertEquals(string, iterator.next().utf8ToString());
+ }
}
}
}
-
- public void testSort() {
+
+ public void testSort() throws IOException {
BytesRefList list = new BytesRefList();
List stringList = new ArrayList();
- int entries = atLeast(500);
- BytesRef spare = new BytesRef();
- for (int i = 0; i < entries; i++) {
- String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
- spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
- stringList.add(randomRealisticUnicodeString);
- }
- Collections.sort(stringList);
- int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
- for (int i = 0; i < entries; i++) {
- assertNotNull(list.get(spare, sortedOrds[i]));
- assertEquals("entry " + i + " doesn't match", stringList.get(i),
- spare.utf8ToString());
+
+ for (int j = 0; j < 2; j++) {
+ if (j > 0 && random.nextBoolean()) {
+ list.clear();
+ stringList.clear();
+ }
+ int entries = atLeast(500);
+ BytesRef spare = new BytesRef();
+ for (int i = 0; i < entries; i++) {
+ String randomRealisticUnicodeString = _TestUtil
+ .randomRealisticUnicodeString(random);
+ spare.copyChars(randomRealisticUnicodeString);
+ list.append(spare);
+ stringList.add(randomRealisticUnicodeString);
+ }
+
+ Collections.sort(stringList);
+ BytesRefIterator iter = list.iterator(BytesRef
+ .getUTF8SortedAsUTF16Comparator());
+ int i = 0;
+ while ((spare = iter.next()) != null) {
+ assertEquals("entry " + i + " doesn't match", stringList.get(i),
+ spare.utf8ToString());
+ i++;
+ }
+ assertNull(iter.next());
+ assertEquals(i, stringList.size());
}
}
+
}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
index 6e74bc20ec9..5638894b83d 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
@@ -17,12 +17,16 @@ package org.apache.lucene.search.suggest;
* the License.
*/
+import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -38,7 +42,8 @@ public class TestTermFreqIterator extends LuceneTestCase {
public void testTerms() throws Exception {
int num = atLeast(10000);
- TreeMap sorted = new TreeMap();
+ Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
+ TreeMap sorted = new TreeMap(comparator);
TermFreq[] unsorted = new TermFreq[num];
for (int i = 0; i < num; i++) {
@@ -52,13 +57,13 @@ public class TestTermFreqIterator extends LuceneTestCase {
}
// test the sorted iterator wrapper
- TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
+ TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator);
Iterator> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
- assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
+ assertEquals(entry.getValue().longValue(), wrapper.weight());
}
assertNull(wrapper.next());
@@ -72,4 +77,57 @@ public class TestTermFreqIterator extends LuceneTestCase {
}
assertEquals(sorted, actual);
}
+
+
+ public void testRaw() throws Exception {
+ int num = atLeast(10000);
+
+ Comparator comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+ BytesRefHash sorted = new BytesRefHash();
+ TermFreq[] unsorted = new TermFreq[num];
+ byte[] buffer = new byte[0];
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+ for (int i = 0; i < num; i++) {
+ BytesRef spare;
+ long weight;
+ do {
+ spare = new BytesRef(_TestUtil.randomUnicodeString(random));
+ if (spare.length + 8 >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, spare.length + 8);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ weight = random.nextLong();
+ output.writeLong(weight);
+
+ } while (sorted.add(new BytesRef(buffer, 0, output.getPosition())) < 0);
+ unsorted[i] = new TermFreq(spare, weight);
+ }
+
+ // test the sorted iterator wrapper
+ TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator, true);
+ int[] sort = sorted.sort(comparator);
+ int size = sorted.size();
+ BytesRef spare = new BytesRef();
+ for (int i = 0; i < size; i++) {
+ sorted.get(sort[i], spare);
+ spare.length -= 8; // sub the long value
+ assertEquals(spare, wrapper.next());
+ spare.offset = spare.offset + spare.length;
+ spare.length = 8;
+ assertEquals(asLong(spare), wrapper.weight());
+ }
+ assertNull(wrapper.next());
+ }
+
+ public static long asLong(BytesRef b) {
+ return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+ b.offset + 4) & 0xFFFFFFFFL);
+ }
+
+ private static int asIntInternal(BytesRef b, int pos) {
+ return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+ | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+ }
}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
index cb62b2ae301..5c06670a3b2 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
@@ -17,9 +17,8 @@ package org.apache.lucene.search.suggest.fst;
* limitations under the License.
*/
-import java.util.Iterator;
-
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@@ -31,7 +30,7 @@ public class BytesRefSortersTest extends LuceneTestCase {
@Test
public void testInMemorySorter() throws Exception {
- check(new InMemorySorter());
+ check(new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()));
}
private void check(BytesRefSorter sorter) throws Exception {
@@ -42,8 +41,8 @@ public class BytesRefSortersTest extends LuceneTestCase {
}
// Create two iterators and check that they're aligned with each other.
- Iterator i1 = sorter.iterator();
- Iterator i2 = sorter.iterator();
+ BytesRefIterator i1 = sorter.iterator();
+ BytesRefIterator i2 = sorter.iterator();
// Verify sorter contract.
try {
@@ -52,10 +51,12 @@ public class BytesRefSortersTest extends LuceneTestCase {
} catch (IllegalStateException e) {
// Expected.
}
-
- while (i1.hasNext() && i2.hasNext()) {
- assertEquals(i1.next(), i2.next());
+ BytesRef spare1;
+ BytesRef spare2;
+ while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) {
+ assertEquals(spare1, spare2);
}
- assertEquals(i1.hasNext(), i2.hasNext());
+ assertNull(i1.next());
+ assertNull(i2.next());
}
}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
index f97d6b5c1dc..339282e642b 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
@@ -165,9 +165,9 @@ public class FSTCompletionTest extends LuceneTestCase {
// All the weights were constant, so all returned buckets must be constant, whatever they
// are.
- Float previous = null;
+ Long previous = null;
for (TermFreq tf : keys) {
- Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
+ Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random))).longValue();
if (previous != null) {
assertEquals(previous, current);
}
@@ -181,7 +181,7 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(input));
for (TermFreq tf : input) {
- assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
+ assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)));
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java
deleted file mode 100644
index 2129142aabd..00000000000
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package org.apache.lucene.search.suggest.fst;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.*;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-public class FloatMagicTest extends LuceneTestCase {
- public void testFloatMagic() {
- ArrayList floats = new ArrayList(Arrays.asList(
- Float.intBitsToFloat(0x7f800001), // NaN (invalid combination).
- Float.intBitsToFloat(0x7fffffff), // NaN (invalid combination).
- Float.intBitsToFloat(0xff800001), // NaN (invalid combination).
- Float.intBitsToFloat(0xffffffff), // NaN (invalid combination).
- Float.POSITIVE_INFINITY,
- Float.MAX_VALUE,
- 100f,
- 0f,
- 0.1f,
- Float.MIN_VALUE,
- Float.NaN,
- -0.0f,
- -Float.MIN_VALUE,
- -0.1f,
- -1f,
- -10f,
- Float.NEGATIVE_INFINITY));
-
- // Sort them using juc.
- Collections.sort(floats);
-
- // Convert to sortable int4 representation (as long to have an unsigned sort).
- long [] int4 = new long [floats.size()];
- for (int i = 0; i < floats.size(); i++) {
- int4[i] = FloatMagic.toSortable(floats.get(i)) & 0xffffffffL;
-
- /*
- System.out.println(
- String.format("raw %8s sortable %8s %8s numutils %8s %s",
- Integer.toHexString(Float.floatToRawIntBits(floats.get(i))),
- Integer.toHexString(FloatMagic.toSortable(floats.get(i))),
- Integer.toHexString(FloatMagic.unsignedOrderedToFloatBits(FloatMagic.toSortable(floats.get(i)))),
- Integer.toHexString(NumericUtils.floatToSortableInt(floats.get(i))),
- floats.get(i)));
- */
- }
-
- // Sort and compare. Should be identical order.
- Arrays.sort(int4);
- ArrayList backFromFixed = new ArrayList();
- for (int i = 0; i < int4.length; i++) {
- backFromFixed.add(FloatMagic.fromSortable((int) int4[i]));
- }
-
- /*
- for (int i = 0; i < int4.length; i++) {
- System.out.println(
- floats.get(i) + " " + FloatMagic.fromSortable((int) int4[i]));
- }
- */
-
- assertEquals(floats, backFromFixed);
- }
-
- @Ignore("Once checked, valid forever?") @Test
- public void testRoundTripFullRange() {
- int i = 0;
- do {
- float f = Float.intBitsToFloat(i);
- float f2 = FloatMagic.fromSortable(FloatMagic.toSortable(f));
-
- if (!((Float.isNaN(f) && Float.isNaN(f2)) || f == f2)) {
- throw new RuntimeException("! " + Integer.toHexString(i) + "> " + f + " " + f2);
- }
-
- if ((i & 0xffffff) == 0) {
- System.out.println(Integer.toHexString(i));
- }
-
- i++;
- } while (i != 0);
- }
-
- @Ignore("Once checked, valid forever?") @Test
- public void testIncreasingFullRange() {
- // -infinity ... -0.0
- for (int i = 0xff800000; i != 0x80000000; i--) {
- checkSmaller(i, i - 1);
- }
-
- // -0.0 +0.0
- checkSmaller(0x80000000, 0);
-
- // +0.0 ... +infinity
- for (int i = 0; i != 0x7f800000; i++) {
- checkSmaller(i, i + 1);
- }
-
- // All other are NaNs and should be after positive infinity.
- final long infinity = toSortableL(Float.POSITIVE_INFINITY);
- for (int i = 0x7f800001; i != 0x7fffffff; i++) {
- assertTrue(infinity < toSortableL(Float.intBitsToFloat(i)));
- }
- for (int i = 0xff800001; i != 0xffffffff; i++) {
- assertTrue(infinity < toSortableL(Float.intBitsToFloat(i)));
- }
- }
-
- private long toSortableL(float f) {
- return FloatMagic.toSortable(f) & 0xffffffffL;
- }
-
- private void checkSmaller(int i1, int i2) {
- float f1 = Float.intBitsToFloat(i1);
- float f2 = Float.intBitsToFloat(i2);
- if (f1 > f2) {
- throw new AssertionError(f1 + " " + f2 + " " + i1 + " " + i2);
- }
- assertTrue(toSortableL(f1) < toSortableL(f2));
- }
-}
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
index f4f985328ca..3a7937c8ac9 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest.fst;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import org.apache.lucene.search.suggest.fst.Sort.BufferSize;
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
@@ -61,7 +62,7 @@ public class TestSort extends LuceneTestCase {
@Test
public void testIntermediateMerges() throws Exception {
// Sort 20 mb worth of data with 1mb buffer, binary merging.
- SortInfo info = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), 2),
+ SortInfo info = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), 2),
generateRandom(Sort.MB * 20));
assertTrue(info.mergeRounds > 10);
}
@@ -69,7 +70,7 @@ public class TestSort extends LuceneTestCase {
@Test
public void testSmallRandom() throws Exception {
// Sort 20 mb worth of data with 1mb buffer.
- SortInfo sortInfo = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES),
+ SortInfo sortInfo = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES),
generateRandom(Sort.MB * 20));
assertEquals(1, sortInfo.mergeRounds);
}
@@ -77,7 +78,7 @@ public class TestSort extends LuceneTestCase {
@Test @Nightly
public void testLargerRandom() throws Exception {
// Sort 100MB worth of data with 15mb buffer.
- checkSort(new Sort(BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES),
+ checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES),
generateRandom(Sort.MB * 100));
}
@@ -92,14 +93,25 @@ public class TestSort extends LuceneTestCase {
byte [][] bytes = data.toArray(new byte[data.size()][]);
return bytes;
}
-
+
+ static final Comparator unsignedByteOrderComparator = new Comparator() {
+ public int compare(byte[] left, byte[] right) {
+ final int max = Math.min(left.length, right.length);
+ for (int i = 0, j = 0; i < max; i++, j++) {
+ int diff = (left[i] & 0xff) - (right[j] & 0xff);
+ if (diff != 0)
+ return diff;
+ }
+ return left.length - right.length;
+ }
+ };
/**
* Check sorting data on an instance of {@link Sort}.
*/
private SortInfo checkSort(Sort sort, byte[][] data) throws IOException {
File unsorted = writeAll("unsorted", data);
- Arrays.sort(data, Sort.unsignedByteOrderComparator);
+ Arrays.sort(data, unsignedByteOrderComparator);
File golden = writeAll("golden", data);
File sorted = new File(tempDir, "sorted");
diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
index 916eeb91557..6cadef3c379 100644
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
@@ -117,7 +117,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
// TODO: could be faster... but its slowCompletor for a reason
for (Map.Entry e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
- matches.add(new LookupResult(e.getKey(), (float)e.getValue().longValue()));
+ matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
}
}
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
index 8f0cade9604..525ce3b97dc 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
@@ -153,11 +153,6 @@ public class Suggester extends SolrSpellChecker {
build(core, searcher);
}
- public void add(CharsRef query, int numHits) {
- LOG.info("add " + query + ", " + numHits);
- lookup.add(query, new Integer(numHits));
- }
-
static SpellingResult EMPTY_RESULT = new SpellingResult();
@Override
@@ -182,7 +177,7 @@ public class Suggester extends SolrSpellChecker {
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
- res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
+ res.add(t, lr.key.toString(), (int)lr.value);
}
}
return res;