mirror of https://github.com/apache/lucene.git
javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388507 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b8f9525142
commit
d4aace1038
|
@ -257,7 +257,7 @@
|
|||
<!-- queryparser: problems -->
|
||||
<!-- sandbox: problems -->
|
||||
<!-- spatial: problems -->
|
||||
<!-- suggest: problems -->
|
||||
<check-missing-javadocs dir="build/docs/suggest" level="method"/>
|
||||
<!-- test-framework: problems -->
|
||||
|
||||
<!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
|
||||
|
|
|
@ -30,6 +30,11 @@ public class CombineSuggestion {
|
|||
*/
|
||||
public final SuggestWord suggestion;
|
||||
|
||||
/**
|
||||
* Creates a new CombineSuggestion from a <code>suggestion</code> and
|
||||
* an array of term ids (referencing the indexes to the original terms that
|
||||
* form this combined suggestion)
|
||||
*/
|
||||
public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) {
|
||||
this.suggestion = suggestion;
|
||||
this.originalTermIndexes = originalTermIndexes;
|
||||
|
|
|
@ -42,6 +42,13 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
private String field;
|
||||
private float thresh;
|
||||
|
||||
/**
|
||||
* Creates a new Dictionary, pulling source terms from
|
||||
* the specified <code>field</code> in the provided <code>reader</code>.
|
||||
* <p>
|
||||
* Terms appearing in less than <code>thres</code> percentage of documents
|
||||
* will be excluded.
|
||||
*/
|
||||
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
||||
this.reader = reader;
|
||||
this.field = field;
|
||||
|
|
|
@ -27,6 +27,13 @@ import java.util.Arrays;
|
|||
public class JaroWinklerDistance implements StringDistance {
|
||||
|
||||
private float threshold = 0.7f;
|
||||
|
||||
/**
|
||||
* Creates a new distance metric with the default threshold
|
||||
* for the Jaro Winkler bonus (0.7)
|
||||
* @see #setThreshold(float)
|
||||
*/
|
||||
public JaroWinklerDistance() {}
|
||||
|
||||
private int[] matches(String s1, String s2) {
|
||||
String max, min;
|
||||
|
|
|
@ -27,14 +27,15 @@ import java.io.*;
|
|||
/**
|
||||
* Lucene Dictionary: terms taken from the given field
|
||||
* of a Lucene index.
|
||||
*
|
||||
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
|
||||
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
|
||||
*/
|
||||
public class LuceneDictionary implements Dictionary {
|
||||
private IndexReader reader;
|
||||
private String field;
|
||||
|
||||
/**
|
||||
* Creates a new Dictionary, pulling source terms from
|
||||
* the specified <code>field</code> in the provided <code>reader</code>
|
||||
*/
|
||||
public LuceneDictionary(IndexReader reader, String field) {
|
||||
this.reader = reader;
|
||||
this.field = field;
|
||||
|
|
|
@ -38,6 +38,12 @@ import org.apache.lucene.util.IntsRef;
|
|||
* for merging results from multiple DirectSpellCheckers.
|
||||
*/
|
||||
public final class LuceneLevenshteinDistance implements StringDistance {
|
||||
|
||||
/**
|
||||
* Creates a new comparator, mimicing the behavior of Lucene's internal
|
||||
* edit distance.
|
||||
*/
|
||||
public LuceneLevenshteinDistance() {}
|
||||
|
||||
@Override
|
||||
public float getDistance(String target, String other) {
|
||||
|
|
|
@ -38,10 +38,20 @@ public class PlainTextDictionary implements Dictionary {
|
|||
|
||||
private BufferedReader in;
|
||||
|
||||
/**
|
||||
* Creates a dictionary based on a File.
|
||||
* <p>
|
||||
* NOTE: content is treated as UTF-8
|
||||
*/
|
||||
public PlainTextDictionary(File file) throws IOException {
|
||||
in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a dictionary based on an inputstream.
|
||||
* <p>
|
||||
* NOTE: content is treated as UTF-8
|
||||
*/
|
||||
public PlainTextDictionary(InputStream dictFile) {
|
||||
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
|
|
|
@ -27,6 +27,11 @@ package org.apache.lucene.search.spell;
|
|||
*/
|
||||
public final class SuggestWord{
|
||||
|
||||
/**
|
||||
* Creates a new empty suggestion with null text.
|
||||
*/
|
||||
public SuggestWord() {}
|
||||
|
||||
/**
|
||||
* the score of the word
|
||||
*/
|
||||
|
|
|
@ -20,10 +20,16 @@ import java.util.Comparator;
|
|||
|
||||
|
||||
/**
|
||||
* Frequency first, then score. Must have
|
||||
* Frequency first, then score.
|
||||
*
|
||||
**/
|
||||
public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
|
||||
|
||||
/**
|
||||
* Creates a new comparator that will compare by {@link SuggestWord#freq},
|
||||
* then by {@link SuggestWord#score}, then by {@link SuggestWord#string}.
|
||||
*/
|
||||
public SuggestWordFrequencyComparator() {}
|
||||
|
||||
public int compare(SuggestWord first, SuggestWord second) {
|
||||
// first criteria: the frequency
|
||||
|
|
|
@ -31,6 +31,10 @@ import java.util.Comparator;
|
|||
*
|
||||
*/
|
||||
public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
|
||||
/**
|
||||
* Default comparator: score then frequency.
|
||||
* @see SuggestWordScoreComparator
|
||||
*/
|
||||
public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,13 @@ import java.util.Comparator;
|
|||
*
|
||||
**/
|
||||
public class SuggestWordScoreComparator implements Comparator<SuggestWord> {
|
||||
|
||||
/**
|
||||
* Creates a new comparator that will compare by {@link SuggestWord#score},
|
||||
* then by {@link SuggestWord#freq}, then by {@link SuggestWord#string}.
|
||||
*/
|
||||
public SuggestWordScoreComparator() {}
|
||||
|
||||
public int compare(SuggestWord first, SuggestWord second) {
|
||||
// first criteria: the distance
|
||||
if (first.score > second.score) {
|
||||
|
|
|
@ -38,6 +38,10 @@ public interface TermFreqIterator extends BytesRefIterator {
|
|||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||
private BytesRefIterator wrapped;
|
||||
|
||||
/**
|
||||
* Creates a new wrapper, wrapping the specified iterator and
|
||||
* specifying a weight value of <code>1</code> for all terms.
|
||||
*/
|
||||
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
|
|
@ -40,8 +40,19 @@ public class WordBreakSpellChecker {
|
|||
private int maxChanges = 1;
|
||||
private int maxEvaluations = 1000;
|
||||
|
||||
/** Term that can be used to prohibit adjacent terms from being combined */
|
||||
public static final Term SEPARATOR_TERM = new Term("", "");
|
||||
|
||||
/**
|
||||
* Creates a new spellchecker with default configuration values
|
||||
* @see #setMaxChanges(int)
|
||||
* @see #setMaxCombineWordLength(int)
|
||||
* @see #setMaxEvaluations(int)
|
||||
* @see #setMinBreakWordLength(int)
|
||||
* @see #setMinSuggestionFrequency(int)
|
||||
*/
|
||||
public WordBreakSpellChecker() {}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Determines the order to list word break suggestions
|
||||
|
@ -347,22 +358,43 @@ public class WordBreakSpellChecker {
|
|||
return word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the minimum frequency a term must have
|
||||
* to be part of a suggestion.
|
||||
* @see #setMinSuggestionFrequency(int)
|
||||
*/
|
||||
public int getMinSuggestionFrequency() {
|
||||
return minSuggestionFrequency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum length of a combined suggestion
|
||||
* @see #setMaxCombineWordLength(int)
|
||||
*/
|
||||
public int getMaxCombineWordLength() {
|
||||
return maxCombineWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the minimum size of a broken word
|
||||
* @see #setMinBreakWordLength(int)
|
||||
*/
|
||||
public int getMinBreakWordLength() {
|
||||
return minBreakWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum number of changes to perform on the input
|
||||
* @see #setMaxChanges(int)
|
||||
*/
|
||||
public int getMaxChanges() {
|
||||
return maxChanges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum number of word combinations to evaluate.
|
||||
* @see #setMaxEvaluations(int)
|
||||
*/
|
||||
public int getMaxEvaluations() {
|
||||
return maxEvaluations;
|
||||
}
|
||||
|
@ -374,7 +406,7 @@ public class WordBreakSpellChecker {
|
|||
* {@link SuggestMode#SUGGEST_MORE_POPULAR}
|
||||
* </p>
|
||||
*
|
||||
* @param minSuggestionFrequency
|
||||
* @see #getMinSuggestionFrequency()
|
||||
*/
|
||||
public void setMinSuggestionFrequency(int minSuggestionFrequency) {
|
||||
this.minSuggestionFrequency = minSuggestionFrequency;
|
||||
|
@ -386,7 +418,7 @@ public class WordBreakSpellChecker {
|
|||
* terms. Default=20
|
||||
* </p>
|
||||
*
|
||||
* @param maxCombineWordLength
|
||||
* @see #getMaxCombineWordLength()
|
||||
*/
|
||||
public void setMaxCombineWordLength(int maxCombineWordLength) {
|
||||
this.maxCombineWordLength = maxCombineWordLength;
|
||||
|
@ -397,7 +429,7 @@ public class WordBreakSpellChecker {
|
|||
* The minimum length to break words down to. Default=1
|
||||
* </p>
|
||||
*
|
||||
* @param minBreakWordLength
|
||||
* @see #getMinBreakWordLength()
|
||||
*/
|
||||
public void setMinBreakWordLength(int minBreakWordLength) {
|
||||
this.minBreakWordLength = minBreakWordLength;
|
||||
|
@ -409,7 +441,7 @@ public class WordBreakSpellChecker {
|
|||
* original term(s). Default=1
|
||||
* </p>
|
||||
*
|
||||
* @param maxChanges
|
||||
* @see #getMaxChanges()
|
||||
*/
|
||||
public void setMaxChanges(int maxChanges) {
|
||||
this.maxChanges = maxChanges;
|
||||
|
@ -422,7 +454,7 @@ public class WordBreakSpellChecker {
|
|||
* performance.
|
||||
* </p>
|
||||
*
|
||||
* @param maxEvaluations
|
||||
* @see #getMaxEvaluations()
|
||||
*/
|
||||
public void setMaxEvaluations(int maxEvaluations) {
|
||||
this.maxEvaluations = maxEvaluations;
|
||||
|
|
|
@ -29,11 +29,16 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||
// TODO keep this for now
|
||||
/** buffered term entries */
|
||||
protected BytesRefList entries = new BytesRefList();
|
||||
/** current buffer position */
|
||||
protected int curPos = -1;
|
||||
/** buffered weights, parallel with {@link #entries} */
|
||||
protected long[] freqs = new long[1];
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final Comparator<BytesRef> comp;
|
||||
|
||||
/** Creates a new iterator, buffering entries from the specified iterator */
|
||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
this.comp = source.getComparator();
|
||||
BytesRef spare;
|
||||
|
|
|
@ -41,6 +41,11 @@ public class FileDictionary implements Dictionary {
|
|||
private String line;
|
||||
private boolean done = false;
|
||||
|
||||
/**
|
||||
* Creates a dictionary based on an inputstream.
|
||||
* <p>
|
||||
* NOTE: content is treated as UTF-8
|
||||
*/
|
||||
public FileDictionary(InputStream dictFile) {
|
||||
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
|
|
|
@ -37,9 +37,14 @@ public abstract class Lookup {
|
|||
* Result of a lookup.
|
||||
*/
|
||||
public static final class LookupResult implements Comparable<LookupResult> {
|
||||
/** the key's text */
|
||||
public final CharSequence key;
|
||||
/** the key's weight */
|
||||
public final long value;
|
||||
|
||||
/**
|
||||
* Create a new result from a key+weight pair.
|
||||
*/
|
||||
public LookupResult(CharSequence key, long value) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
|
@ -86,6 +91,9 @@ public abstract class Lookup {
|
|||
*/
|
||||
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
|
||||
// TODO: should we move this out of the interface into a utility class?
|
||||
/**
|
||||
* Creates a new priority queue of the specified size.
|
||||
*/
|
||||
public LookupPriorityQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
|
|
@ -47,10 +47,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
private final BytesRef scratch = new BytesRef();
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
/**
|
||||
* Calls {@link #SortedTermFreqIteratorWrapper(TermFreqIterator, Comparator, boolean)
|
||||
* SortedTermFreqIteratorWrapper(source, comparator, false)}
|
||||
*/
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||
this(source, comparator, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new sorted wrapper. if <code>compareRawBytes</code> is true, then
|
||||
* only the bytes (not the weight) will be used for comparison.
|
||||
*/
|
||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
|
||||
this.source = source;
|
||||
this.comparator = comparator;
|
||||
|
@ -162,6 +170,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
}
|
||||
}
|
||||
|
||||
/** encodes an entry (bytes+weight) to the provided writer */
|
||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
|
||||
if (spare.length + 8 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 8);
|
||||
|
@ -172,6 +181,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
|||
writer.write(buffer, 0, output.getPosition());
|
||||
}
|
||||
|
||||
/** decodes the weight at the current position */
|
||||
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
|
||||
tmpInput.reset(scratch.bytes);
|
||||
tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
|
||||
|
|
|
@ -33,6 +33,10 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
|
|||
private final int[] ords;
|
||||
private int currentOrd = -1;
|
||||
private final BytesRef spare = new BytesRef();
|
||||
/**
|
||||
* Creates a new iterator, wrapping the specified iterator and
|
||||
* returning elements in a random order.
|
||||
*/
|
||||
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||
super(source);
|
||||
ords = new int[entries.size()];
|
||||
|
|
|
@ -123,15 +123,26 @@ public final class Sort {
|
|||
* Sort info (debugging mostly).
|
||||
*/
|
||||
public class SortInfo {
|
||||
/** number of temporary files created when merging partitions */
|
||||
public int tempMergeFiles;
|
||||
/** number of partition merges */
|
||||
public int mergeRounds;
|
||||
/** number of lines of data read */
|
||||
public int lines;
|
||||
/** time spent merging sorted partitions (in milliseconds) */
|
||||
public long mergeTime;
|
||||
/** time spent sorting data (in milliseconds) */
|
||||
public long sortTime;
|
||||
/** total time spent (in milliseconds) */
|
||||
public long totalTime;
|
||||
/** time spent in i/o read (in milliseconds) */
|
||||
public long readTime;
|
||||
/** read buffer size (in bytes) */
|
||||
public final long bufferSize = ramBufferSize.bytes;
|
||||
|
||||
/** create a new SortInfo (with empty statistics) for debugging */
|
||||
public SortInfo() {}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT,
|
||||
|
|
|
@ -44,6 +44,12 @@ public class JaspellLookup extends Lookup {
|
|||
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
|
||||
private boolean usePrefix = true;
|
||||
private int editDistance = 2;
|
||||
|
||||
/**
|
||||
* Creates a new empty trie
|
||||
* @see #build(TermFreqIterator)
|
||||
* */
|
||||
public JaspellLookup() {}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
|
@ -68,12 +74,22 @@ public class JaspellLookup extends Lookup {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new node if <code>key</code> already exists,
|
||||
* otherwise replaces its value.
|
||||
* <p>
|
||||
* This method always returns false.
|
||||
*/
|
||||
public boolean add(CharSequence key, Object value) {
|
||||
trie.put(key, value);
|
||||
// XXX
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value for the specified key, or null
|
||||
* if the key does not exist.
|
||||
*/
|
||||
public Object get(CharSequence key) {
|
||||
return trie.get(key);
|
||||
}
|
||||
|
|
|
@ -160,6 +160,10 @@ public class JaspellTernarySearchTrie {
|
|||
this(Locale.ROOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an empty Ternary Search Trie,
|
||||
* specifying the Locale used for lowercasing.
|
||||
*/
|
||||
public JaspellTernarySearchTrie(Locale locale) {
|
||||
this.locale = locale;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@ import java.util.*;
|
|||
* @see TernaryTreeNode
|
||||
*/
|
||||
public class TSTAutocomplete {
|
||||
|
||||
TSTAutocomplete() {}
|
||||
|
||||
/**
|
||||
* Inserting keys in TST in the order middle,small,big (lexicographic measure)
|
||||
|
|
|
@ -42,6 +42,12 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
public class TSTLookup extends Lookup {
|
||||
TernaryTreeNode root = new TernaryTreeNode();
|
||||
TSTAutocomplete autocomplete = new TSTAutocomplete();
|
||||
|
||||
/**
|
||||
* Creates a new TSTLookup with an empty Ternary Search Tree.
|
||||
* @see #build(TermFreqIterator)
|
||||
*/
|
||||
public TSTLookup() {}
|
||||
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
|
@ -65,12 +71,22 @@ public class TSTLookup extends Lookup {
|
|||
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new node if <code>key</code> already exists,
|
||||
* otherwise replaces its value.
|
||||
* <p>
|
||||
* This method always returns true.
|
||||
*/
|
||||
public boolean add(CharSequence key, Object value) {
|
||||
autocomplete.insert(root, key, value, 0);
|
||||
// XXX we don't know if a new node was created
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value for the specified key, or null
|
||||
* if the key does not exist.
|
||||
*/
|
||||
public Object get(CharSequence key) {
|
||||
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
||||
if (list == null || list.isEmpty()) {
|
||||
|
|
|
@ -22,6 +22,9 @@ package org.apache.lucene.search.suggest.tst;
|
|||
*/
|
||||
|
||||
public class TernaryTreeNode {
|
||||
|
||||
/** Creates a new empty node */
|
||||
public TernaryTreeNode() {}
|
||||
/** the character stored by a node. */
|
||||
char splitchar;
|
||||
/** a reference object to the node containing character smaller than this node's character. */
|
||||
|
|
Loading…
Reference in New Issue