git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388507 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-21 14:09:02 +00:00
parent b8f9525142
commit d4aace1038
24 changed files with 188 additions and 10 deletions

View File

@ -257,7 +257,7 @@
<!-- queryparser: problems -->
<!-- sandbox: problems -->
<!-- spatial: problems -->
<!-- suggest: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="method"/>
<!-- test-framework: problems -->
<!-- too much to fix core/ for now, but enforce full javadocs for key packages -->

View File

@ -30,6 +30,11 @@ public class CombineSuggestion {
*/
public final SuggestWord suggestion;
/**
* Creates a new CombineSuggestion from a <code>suggestion</code> and
* an array of term ids (referencing the indexes to the original terms that
* form this combined suggestion)
*/
public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) {
this.suggestion = suggestion;
this.originalTermIndexes = originalTermIndexes;

View File

@ -42,6 +42,13 @@ public class HighFrequencyDictionary implements Dictionary {
private String field;
private float thresh;
/**
* Creates a new Dictionary, pulling source terms from
* the specified <code>field</code> in the provided <code>reader</code>.
* <p>
* Terms appearing in less than <code>thres</code> percentage of documents
* will be excluded.
*/
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader;
this.field = field;

View File

@ -27,6 +27,13 @@ import java.util.Arrays;
public class JaroWinklerDistance implements StringDistance {
private float threshold = 0.7f;
/**
* Creates a new distance metric with the default threshold
* for the Jaro Winkler bonus (0.7)
* @see #setThreshold(float)
*/
public JaroWinklerDistance() {}
private int[] matches(String s1, String s2) {
String max, min;

View File

@ -27,14 +27,15 @@ import java.io.*;
/**
* Lucene Dictionary: terms taken from the given field
* of a Lucene index.
*
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
*/
public class LuceneDictionary implements Dictionary {
private IndexReader reader;
private String field;
/**
* Creates a new Dictionary, pulling source terms from
* the specified <code>field</code> in the provided <code>reader</code>
*/
public LuceneDictionary(IndexReader reader, String field) {
this.reader = reader;
this.field = field;

View File

@ -38,6 +38,12 @@ import org.apache.lucene.util.IntsRef;
* for merging results from multiple DirectSpellCheckers.
*/
public final class LuceneLevenshteinDistance implements StringDistance {
/**
* Creates a new comparator, mimicing the behavior of Lucene's internal
* edit distance.
*/
public LuceneLevenshteinDistance() {}
@Override
public float getDistance(String target, String other) {

View File

@ -38,10 +38,20 @@ public class PlainTextDictionary implements Dictionary {
private BufferedReader in;
/**
* Creates a dictionary based on a File.
* <p>
* NOTE: content is treated as UTF-8
*/
public PlainTextDictionary(File file) throws IOException {
in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
}
/**
* Creates a dictionary based on an inputstream.
* <p>
* NOTE: content is treated as UTF-8
*/
public PlainTextDictionary(InputStream dictFile) {
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
}

View File

@ -27,6 +27,11 @@ package org.apache.lucene.search.spell;
*/
public final class SuggestWord{
/**
* Creates a new empty suggestion with null text.
*/
public SuggestWord() {}
/**
* the score of the word
*/

View File

@ -20,10 +20,16 @@ import java.util.Comparator;
/**
* Frequency first, then score. Must have
* Frequency first, then score.
*
**/
public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
/**
* Creates a new comparator that will compare by {@link SuggestWord#freq},
* then by {@link SuggestWord#score}, then by {@link SuggestWord#string}.
*/
public SuggestWordFrequencyComparator() {}
public int compare(SuggestWord first, SuggestWord second) {
// first criteria: the frequency

View File

@ -31,6 +31,10 @@ import java.util.Comparator;
*
*/
public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
/**
* Default comparator: score then frequency.
* @see SuggestWordScoreComparator
*/
public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();

View File

@ -24,6 +24,13 @@ import java.util.Comparator;
*
**/
public class SuggestWordScoreComparator implements Comparator<SuggestWord> {
/**
* Creates a new comparator that will compare by {@link SuggestWord#score},
* then by {@link SuggestWord#freq}, then by {@link SuggestWord#string}.
*/
public SuggestWordScoreComparator() {}
public int compare(SuggestWord first, SuggestWord second) {
// first criteria: the distance
if (first.score > second.score) {

View File

@ -38,6 +38,10 @@ public interface TermFreqIterator extends BytesRefIterator {
public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped;
/**
* Creates a new wrapper, wrapping the specified iterator and
* specifying a weight value of <code>1</code> for all terms.
*/
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped;
}

View File

@ -40,8 +40,19 @@ public class WordBreakSpellChecker {
private int maxChanges = 1;
private int maxEvaluations = 1000;
/** Term that can be used to prohibit adjacent terms from being combined */
public static final Term SEPARATOR_TERM = new Term("", "");
/**
* Creates a new spellchecker with default configuration values
* @see #setMaxChanges(int)
* @see #setMaxCombineWordLength(int)
* @see #setMaxEvaluations(int)
* @see #setMinBreakWordLength(int)
* @see #setMinSuggestionFrequency(int)
*/
public WordBreakSpellChecker() {}
/**
* <p>
* Determines the order to list word break suggestions
@ -347,22 +358,43 @@ public class WordBreakSpellChecker {
return word;
}
/**
* Returns the minimum frequency a term must have
* to be part of a suggestion.
* @see #setMinSuggestionFrequency(int)
*/
public int getMinSuggestionFrequency() {
return minSuggestionFrequency;
}
/**
* Returns the maximum length of a combined suggestion
* @see #setMaxCombineWordLength(int)
*/
public int getMaxCombineWordLength() {
return maxCombineWordLength;
}
/**
* Returns the minimum size of a broken word
* @see #setMinBreakWordLength(int)
*/
public int getMinBreakWordLength() {
return minBreakWordLength;
}
/**
* Returns the maximum number of changes to perform on the input
* @see #setMaxChanges(int)
*/
public int getMaxChanges() {
return maxChanges;
}
/**
* Returns the maximum number of word combinations to evaluate.
* @see #setMaxEvaluations(int)
*/
public int getMaxEvaluations() {
return maxEvaluations;
}
@ -374,7 +406,7 @@ public class WordBreakSpellChecker {
* {@link SuggestMode#SUGGEST_MORE_POPULAR}
* </p>
*
* @param minSuggestionFrequency
* @see #getMinSuggestionFrequency()
*/
public void setMinSuggestionFrequency(int minSuggestionFrequency) {
this.minSuggestionFrequency = minSuggestionFrequency;
@ -386,7 +418,7 @@ public class WordBreakSpellChecker {
* terms. Default=20
* </p>
*
* @param maxCombineWordLength
* @see #getMaxCombineWordLength()
*/
public void setMaxCombineWordLength(int maxCombineWordLength) {
this.maxCombineWordLength = maxCombineWordLength;
@ -397,7 +429,7 @@ public class WordBreakSpellChecker {
* The minimum length to break words down to. Default=1
* </p>
*
* @param minBreakWordLength
* @see #getMinBreakWordLength()
*/
public void setMinBreakWordLength(int minBreakWordLength) {
this.minBreakWordLength = minBreakWordLength;
@ -409,7 +441,7 @@ public class WordBreakSpellChecker {
* original term(s). Default=1
* </p>
*
* @param maxChanges
* @see #getMaxChanges()
*/
public void setMaxChanges(int maxChanges) {
this.maxChanges = maxChanges;
@ -422,7 +454,7 @@ public class WordBreakSpellChecker {
* performance.
* </p>
*
* @param maxEvaluations
* @see #getMaxEvaluations()
*/
public void setMaxEvaluations(int maxEvaluations) {
this.maxEvaluations = maxEvaluations;

View File

@ -29,11 +29,16 @@ import org.apache.lucene.util.BytesRef;
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
// TODO keep this for now
/** buffered term entries */
protected BytesRefList entries = new BytesRefList();
/** current buffer position */
protected int curPos = -1;
/** buffered weights, parallel with {@link #entries} */
protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
/** Creates a new iterator, buffering entries from the specified iterator */
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
this.comp = source.getComparator();
BytesRef spare;

View File

@ -41,6 +41,11 @@ public class FileDictionary implements Dictionary {
private String line;
private boolean done = false;
/**
* Creates a dictionary based on an inputstream.
* <p>
* NOTE: content is treated as UTF-8
*/
public FileDictionary(InputStream dictFile) {
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
}

View File

@ -37,9 +37,14 @@ public abstract class Lookup {
* Result of a lookup.
*/
public static final class LookupResult implements Comparable<LookupResult> {
/** the key's text */
public final CharSequence key;
/** the key's weight */
public final long value;
/**
* Create a new result from a key+weight pair.
*/
public LookupResult(CharSequence key, long value) {
this.key = key;
this.value = value;
@ -86,6 +91,9 @@ public abstract class Lookup {
*/
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
// TODO: should we move this out of the interface into a utility class?
/**
* Creates a new priority queue of the specified size.
*/
public LookupPriorityQueue(int size) {
super(size);
}

View File

@ -47,10 +47,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
private final BytesRef scratch = new BytesRef();
private final Comparator<BytesRef> comparator;
/**
* Calls {@link #SortedTermFreqIteratorWrapper(TermFreqIterator, Comparator, boolean)
* SortedTermFreqIteratorWrapper(source, comparator, false)}
*/
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
this(source, comparator, false);
}
/**
* Creates a new sorted wrapper. if <code>compareRawBytes</code> is true, then
* only the bytes (not the weight) will be used for comparison.
*/
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
this.source = source;
this.comparator = comparator;
@ -162,6 +170,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
}
}
/** encodes an entry (bytes+weight) to the provided writer */
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
if (spare.length + 8 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 8);
@ -172,6 +181,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
writer.write(buffer, 0, output.getPosition());
}
/** decodes the weight at the current position */
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
tmpInput.reset(scratch.bytes);
tmpInput.skipBytes(scratch.length - 8); // suggestion + separator

View File

@ -33,6 +33,10 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
private final int[] ords;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
/**
* Creates a new iterator, wrapping the specified iterator and
* returning elements in a random order.
*/
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
super(source);
ords = new int[entries.size()];

View File

@ -123,15 +123,26 @@ public final class Sort {
* Sort info (debugging mostly).
*/
public class SortInfo {
/** number of temporary files created when merging partitions */
public int tempMergeFiles;
/** number of partition merges */
public int mergeRounds;
/** number of lines of data read */
public int lines;
/** time spent merging sorted partitions (in milliseconds) */
public long mergeTime;
/** time spent sorting data (in milliseconds) */
public long sortTime;
/** total time spent (in milliseconds) */
public long totalTime;
/** time spent in i/o read (in milliseconds) */
public long readTime;
/** read buffer size (in bytes) */
public final long bufferSize = ramBufferSize.bytes;
/** create a new SortInfo (with empty statistics) for debugging */
public SortInfo() {}
@Override
public String toString() {
return String.format(Locale.ROOT,

View File

@ -44,6 +44,12 @@ public class JaspellLookup extends Lookup {
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
private boolean usePrefix = true;
private int editDistance = 2;
/**
* Creates a new empty trie
* @see #build(TermFreqIterator)
* */
public JaspellLookup() {}
@Override
public void build(TermFreqIterator tfit) throws IOException {
@ -68,12 +74,22 @@ public class JaspellLookup extends Lookup {
}
}
/**
* Adds a new node if <code>key</code> already exists,
* otherwise replaces its value.
* <p>
* This method always returns false.
*/
public boolean add(CharSequence key, Object value) {
trie.put(key, value);
// XXX
return false;
}
/**
* Returns the value for the specified key, or null
* if the key does not exist.
*/
public Object get(CharSequence key) {
return trie.get(key);
}

View File

@ -160,6 +160,10 @@ public class JaspellTernarySearchTrie {
this(Locale.ROOT);
}
/**
* Constructs an empty Ternary Search Trie,
* specifying the Locale used for lowercasing.
*/
public JaspellTernarySearchTrie(Locale locale) {
this.locale = locale;
}

View File

@ -25,6 +25,8 @@ import java.util.*;
* @see TernaryTreeNode
*/
public class TSTAutocomplete {
TSTAutocomplete() {}
/**
* Inserting keys in TST in the order middle,small,big (lexicographic measure)

View File

@ -42,6 +42,12 @@ import org.apache.lucene.util.UnicodeUtil;
public class TSTLookup extends Lookup {
TernaryTreeNode root = new TernaryTreeNode();
TSTAutocomplete autocomplete = new TSTAutocomplete();
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
* @see #build(TermFreqIterator)
*/
public TSTLookup() {}
@Override
public void build(TermFreqIterator tfit) throws IOException {
@ -65,12 +71,22 @@ public class TSTLookup extends Lookup {
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
/**
* Adds a new node if <code>key</code> already exists,
* otherwise replaces its value.
* <p>
* This method always returns true.
*/
public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created
return true;
}
/**
* Returns the value for the specified key, or null
* if the key does not exist.
*/
public Object get(CharSequence key) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) {

View File

@ -22,6 +22,9 @@ package org.apache.lucene.search.suggest.tst;
*/
public class TernaryTreeNode {
/** Creates a new empty node */
public TernaryTreeNode() {}
/** the character stored by a node. */
char splitchar;
/** a reference object to the node containing character smaller than this node's character. */