git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-21 01:53:14 +00:00
parent c17b6776b5
commit 70599e2dfc
11 changed files with 60 additions and 4 deletions

View File

@ -88,6 +88,9 @@ public class DirectSpellChecker {
/** the string distance to use */
private StringDistance distance = INTERNAL_LEVENSHTEIN;
/** Creates a DirectSpellChecker with default configuration values */
public DirectSpellChecker() {}
/** Get the maximum number of Levenshtein edit-distances to draw
* candidate terms from. */
public int getMaxEdits() {

View File

@ -187,6 +187,10 @@ public class SpellChecker implements java.io.Closeable {
this.comparator = comparator;
}
/**
* Gets the comparator in use for ranking suggestions.
* @see #setComparator(Comparator)
*/
public Comparator<SuggestWord> getComparator() {
return comparator;
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.util.BytesRefIterator;
*/
public interface TermFreqIterator extends BytesRefIterator {
/** A term's weight, higher numbers mean better suggestions. */
public long weight();
/**

View File

@ -109,6 +109,12 @@ public abstract class Lookup {
}
}
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
public Lookup() {}
/** Build lookup from a dictionary. Some implementations may require sorted
* or unsorted keys from the dictionary's iterator - use
* {@link SortedTermFreqIteratorWrapper} or

View File

@ -47,5 +47,8 @@ public interface BytesRefSorter {
*/
BytesRefIterator iterator() throws IOException;
/**
* Comparator used to determine the sort order of entries.
*/
Comparator<BytesRef> getComparator();
}

View File

@ -42,7 +42,9 @@ public class FSTCompletion {
* A single completion for a given key.
*/
public static final class Completion implements Comparable<Completion> {
/** UTF-8 bytes of the suggestion */
public final BytesRef utf8;
/** source bucket (weight) of the suggestion */
public final int bucket;
Completion(BytesRef key, int bucket) {
@ -95,6 +97,7 @@ public class FSTCompletion {
private boolean higherWeightsFirst;
/**
* Constructs an FSTCompletion, specifying higherWeightsFirst and exactFirst.
* @param automaton
* Automaton with completions. See {@link FSTCompletionBuilder}.
* @param higherWeightsFirst

View File

@ -152,6 +152,7 @@ public class FSTCompletionBuilder {
}
/**
* Creates an FSTCompletion with the specified options.
* @param buckets
* The number of buckets for weight discretization. Buckets are used
* in {@link #add(BytesRef, int)} and must be smaller than the number

View File

@ -249,6 +249,10 @@ public class FSTCompletionLookup extends Lookup {
return results;
}
/**
* Returns the bucket (weight) as a Long for the provided key if it exists,
* otherwise null if it does not.
*/
public Object get(CharSequence key) {
final int bucket = normalCompletion.getBucket(key);
return bucket == -1 ? null : Long.valueOf(bucket);

View File

@ -33,6 +33,10 @@ public final class InMemorySorter implements BytesRefSorter {
private boolean closed = false;
private final Comparator<BytesRef> comparator;
/**
* Creates an InMemorySorter, sorting entries by the
* provided comparator.
*/
public InMemorySorter(Comparator<BytesRef> comparator) {
this.comparator = comparator;
}

View File

@ -37,7 +37,9 @@ import org.apache.lucene.util.PriorityQueue;
* @lucene.internal
*/
public final class Sort {
/** Convenience constant for megabytes */
public final static long MB = 1024 * 1024;
/** Convenience constant for gigabytes */
public final static long GB = MB * 1024;
/**
@ -148,6 +150,7 @@ public final class Sort {
private int maxTempFiles;
private final Comparator<BytesRef> comparator;
/** Default comparator: sorts in binary (codepoint) order */
public static final Comparator<BytesRef> DEFAULT_COMPARATOR = BytesRef.getUTF8SortedAsUnicodeComparator();
/**
@ -160,6 +163,12 @@ public final class Sort {
this(DEFAULT_COMPARATOR, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
}
/**
* Defaults constructor with a custom comparator.
*
* @see #defaultTempDir()
* @see BufferSize#automatic()
*/
public Sort(Comparator<BytesRef> comparator) throws IOException {
this(comparator, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
}
@ -401,25 +410,41 @@ public final class Sort {
public static class ByteSequencesWriter implements Closeable {
private final DataOutput os;
/** Constructs a ByteSequencesWriter to the provided File */
public ByteSequencesWriter(File file) throws IOException {
this(new DataOutputStream(
new BufferedOutputStream(
new FileOutputStream(file))));
}
/** Constructs a ByteSequencesWriter to the provided DataOutput */
public ByteSequencesWriter(DataOutput os) {
this.os = os;
}
/**
* Writes a BytesRef.
* @see #write(byte[], int, int)
*/
public void write(BytesRef ref) throws IOException {
assert ref != null;
write(ref.bytes, ref.offset, ref.length);
}
/**
* Writes a byte array.
* @see #write(byte[], int, int)
*/
public void write(byte [] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
/**
* Writes a byte array.
* <p>
* The length is written as a <code>short</code>, followed
* by the bytes.
*/
public void write(byte [] bytes, int off, int len) throws IOException {
assert bytes != null;
assert off >= 0 && off + len <= bytes.length;
@ -446,12 +471,14 @@ public final class Sort {
public static class ByteSequencesReader implements Closeable {
private final DataInput is;
/** Constructs a ByteSequencesReader from the provided File */
public ByteSequencesReader(File file) throws IOException {
this(new DataInputStream(
new BufferedInputStream(
new FileInputStream(file))));
}
/** Constructs a ByteSequencesReader from the provided DataInput */
public ByteSequencesReader(DataInput is) {
this.is = is;
}
@ -513,6 +540,7 @@ public final class Sort {
}
}
/** Returns the comparator in use to sort entries */
public Comparator<BytesRef> getComparator() {
return comparator;
}

View File

@ -52,10 +52,9 @@ import org.apache.lucene.util.fst.Util.MinResult;
* then walks the <i>n</i> shortest paths to retrieve top-ranked
* suggestions.
* <p>
* <b>NOTE</b>: Although the {@link TermFreqIterator} API specifies
* floating point weights, input weights should be whole numbers.
* Input weights will be cast to a java integer, and any
* negative, infinite, or NaN values will be rejected.
* <b>NOTE</b>:
* Input weights must be between 0 and {@link Integer#MAX_VALUE}, any
* other values will be rejected.
*
* @see Util#shortestPaths(FST, FST.Arc, Comparator, int)
* @lucene.experimental