git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388507 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-21 14:09:02 +00:00
parent b8f9525142
commit d4aace1038
24 changed files with 188 additions and 10 deletions

View File

@ -257,7 +257,7 @@
<!-- queryparser: problems --> <!-- queryparser: problems -->
<!-- sandbox: problems --> <!-- sandbox: problems -->
<!-- spatial: problems --> <!-- spatial: problems -->
<!-- suggest: problems --> <check-missing-javadocs dir="build/docs/suggest" level="method"/>
<!-- test-framework: problems --> <!-- test-framework: problems -->
<!-- too much to fix core/ for now, but enforce full javadocs for key packages --> <!-- too much to fix core/ for now, but enforce full javadocs for key packages -->

View File

@ -30,6 +30,11 @@ public class CombineSuggestion {
*/ */
public final SuggestWord suggestion; public final SuggestWord suggestion;
/**
* Creates a new CombineSuggestion from a <code>suggestion</code> and
* an array of term ids (referencing the indexes to the original terms that
* form this combined suggestion)
*/
public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) { public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) {
this.suggestion = suggestion; this.suggestion = suggestion;
this.originalTermIndexes = originalTermIndexes; this.originalTermIndexes = originalTermIndexes;

View File

@ -42,6 +42,13 @@ public class HighFrequencyDictionary implements Dictionary {
private String field; private String field;
private float thresh; private float thresh;
/**
* Creates a new Dictionary, pulling source terms from
* the specified <code>field</code> in the provided <code>reader</code>.
* <p>
* Terms appearing in less than <code>thres</code> percentage of documents
* will be excluded.
*/
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) { public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader; this.reader = reader;
this.field = field; this.field = field;

View File

@ -28,6 +28,13 @@ public class JaroWinklerDistance implements StringDistance {
private float threshold = 0.7f; private float threshold = 0.7f;
/**
* Creates a new distance metric with the default threshold
* for the Jaro Winkler bonus (0.7)
* @see #setThreshold(float)
*/
public JaroWinklerDistance() {}
private int[] matches(String s1, String s2) { private int[] matches(String s1, String s2) {
String max, min; String max, min;
if (s1.length() > s2.length()) { if (s1.length() > s2.length()) {

View File

@ -27,14 +27,15 @@ import java.io.*;
/** /**
* Lucene Dictionary: terms taken from the given field * Lucene Dictionary: terms taken from the given field
* of a Lucene index. * of a Lucene index.
*
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
*/ */
public class LuceneDictionary implements Dictionary { public class LuceneDictionary implements Dictionary {
private IndexReader reader; private IndexReader reader;
private String field; private String field;
/**
* Creates a new Dictionary, pulling source terms from
* the specified <code>field</code> in the provided <code>reader</code>
*/
public LuceneDictionary(IndexReader reader, String field) { public LuceneDictionary(IndexReader reader, String field) {
this.reader = reader; this.reader = reader;
this.field = field; this.field = field;

View File

@ -39,6 +39,12 @@ import org.apache.lucene.util.IntsRef;
*/ */
public final class LuceneLevenshteinDistance implements StringDistance { public final class LuceneLevenshteinDistance implements StringDistance {
/**
* Creates a new comparator, mimicing the behavior of Lucene's internal
* edit distance.
*/
public LuceneLevenshteinDistance() {}
@Override @Override
public float getDistance(String target, String other) { public float getDistance(String target, String other) {
IntsRef targetPoints; IntsRef targetPoints;

View File

@ -38,10 +38,20 @@ public class PlainTextDictionary implements Dictionary {
private BufferedReader in; private BufferedReader in;
/**
* Creates a dictionary based on a File.
* <p>
* NOTE: content is treated as UTF-8
*/
public PlainTextDictionary(File file) throws IOException { public PlainTextDictionary(File file) throws IOException {
in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8)); in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
} }
/**
* Creates a dictionary based on an inputstream.
* <p>
* NOTE: content is treated as UTF-8
*/
public PlainTextDictionary(InputStream dictFile) { public PlainTextDictionary(InputStream dictFile) {
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
} }

View File

@ -27,6 +27,11 @@ package org.apache.lucene.search.spell;
*/ */
public final class SuggestWord{ public final class SuggestWord{
/**
* Creates a new empty suggestion with null text.
*/
public SuggestWord() {}
/** /**
* the score of the word * the score of the word
*/ */

View File

@ -20,11 +20,17 @@ import java.util.Comparator;
/** /**
* Frequency first, then score. Must have * Frequency first, then score.
* *
**/ **/
public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> { public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
/**
* Creates a new comparator that will compare by {@link SuggestWord#freq},
* then by {@link SuggestWord#score}, then by {@link SuggestWord#string}.
*/
public SuggestWordFrequencyComparator() {}
public int compare(SuggestWord first, SuggestWord second) { public int compare(SuggestWord first, SuggestWord second) {
// first criteria: the frequency // first criteria: the frequency
if (first.freq > second.freq) { if (first.freq > second.freq) {

View File

@ -31,6 +31,10 @@ import java.util.Comparator;
* *
*/ */
public final class SuggestWordQueue extends PriorityQueue<SuggestWord> { public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
/**
* Default comparator: score then frequency.
* @see SuggestWordScoreComparator
*/
public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator(); public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();

View File

@ -24,6 +24,13 @@ import java.util.Comparator;
* *
**/ **/
public class SuggestWordScoreComparator implements Comparator<SuggestWord> { public class SuggestWordScoreComparator implements Comparator<SuggestWord> {
/**
* Creates a new comparator that will compare by {@link SuggestWord#score},
* then by {@link SuggestWord#freq}, then by {@link SuggestWord#string}.
*/
public SuggestWordScoreComparator() {}
public int compare(SuggestWord first, SuggestWord second) { public int compare(SuggestWord first, SuggestWord second) {
// first criteria: the distance // first criteria: the distance
if (first.score > second.score) { if (first.score > second.score) {

View File

@ -38,6 +38,10 @@ public interface TermFreqIterator extends BytesRefIterator {
public static class TermFreqIteratorWrapper implements TermFreqIterator { public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped; private BytesRefIterator wrapped;
/**
* Creates a new wrapper, wrapping the specified iterator and
* specifying a weight value of <code>1</code> for all terms.
*/
public TermFreqIteratorWrapper(BytesRefIterator wrapped) { public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped; this.wrapped = wrapped;
} }

View File

@ -40,8 +40,19 @@ public class WordBreakSpellChecker {
private int maxChanges = 1; private int maxChanges = 1;
private int maxEvaluations = 1000; private int maxEvaluations = 1000;
/** Term that can be used to prohibit adjacent terms from being combined */
public static final Term SEPARATOR_TERM = new Term("", ""); public static final Term SEPARATOR_TERM = new Term("", "");
/**
* Creates a new spellchecker with default configuration values
* @see #setMaxChanges(int)
* @see #setMaxCombineWordLength(int)
* @see #setMaxEvaluations(int)
* @see #setMinBreakWordLength(int)
* @see #setMinSuggestionFrequency(int)
*/
public WordBreakSpellChecker() {}
/** /**
* <p> * <p>
* Determines the order to list word break suggestions * Determines the order to list word break suggestions
@ -347,22 +358,43 @@ public class WordBreakSpellChecker {
return word; return word;
} }
/**
* Returns the minimum frequency a term must have
* to be part of a suggestion.
* @see #setMinSuggestionFrequency(int)
*/
public int getMinSuggestionFrequency() { public int getMinSuggestionFrequency() {
return minSuggestionFrequency; return minSuggestionFrequency;
} }
/**
* Returns the maximum length of a combined suggestion
* @see #setMaxCombineWordLength(int)
*/
public int getMaxCombineWordLength() { public int getMaxCombineWordLength() {
return maxCombineWordLength; return maxCombineWordLength;
} }
/**
* Returns the minimum size of a broken word
* @see #setMinBreakWordLength(int)
*/
public int getMinBreakWordLength() { public int getMinBreakWordLength() {
return minBreakWordLength; return minBreakWordLength;
} }
/**
* Returns the maximum number of changes to perform on the input
* @see #setMaxChanges(int)
*/
public int getMaxChanges() { public int getMaxChanges() {
return maxChanges; return maxChanges;
} }
/**
* Returns the maximum number of word combinations to evaluate.
* @see #setMaxEvaluations(int)
*/
public int getMaxEvaluations() { public int getMaxEvaluations() {
return maxEvaluations; return maxEvaluations;
} }
@ -374,7 +406,7 @@ public class WordBreakSpellChecker {
* {@link SuggestMode#SUGGEST_MORE_POPULAR} * {@link SuggestMode#SUGGEST_MORE_POPULAR}
* </p> * </p>
* *
* @param minSuggestionFrequency * @see #getMinSuggestionFrequency()
*/ */
public void setMinSuggestionFrequency(int minSuggestionFrequency) { public void setMinSuggestionFrequency(int minSuggestionFrequency) {
this.minSuggestionFrequency = minSuggestionFrequency; this.minSuggestionFrequency = minSuggestionFrequency;
@ -386,7 +418,7 @@ public class WordBreakSpellChecker {
* terms. Default=20 * terms. Default=20
* </p> * </p>
* *
* @param maxCombineWordLength * @see #getMaxCombineWordLength()
*/ */
public void setMaxCombineWordLength(int maxCombineWordLength) { public void setMaxCombineWordLength(int maxCombineWordLength) {
this.maxCombineWordLength = maxCombineWordLength; this.maxCombineWordLength = maxCombineWordLength;
@ -397,7 +429,7 @@ public class WordBreakSpellChecker {
* The minimum length to break words down to. Default=1 * The minimum length to break words down to. Default=1
* </p> * </p>
* *
* @param minBreakWordLength * @see #getMinBreakWordLength()
*/ */
public void setMinBreakWordLength(int minBreakWordLength) { public void setMinBreakWordLength(int minBreakWordLength) {
this.minBreakWordLength = minBreakWordLength; this.minBreakWordLength = minBreakWordLength;
@ -409,7 +441,7 @@ public class WordBreakSpellChecker {
* original term(s). Default=1 * original term(s). Default=1
* </p> * </p>
* *
* @param maxChanges * @see #getMaxChanges()
*/ */
public void setMaxChanges(int maxChanges) { public void setMaxChanges(int maxChanges) {
this.maxChanges = maxChanges; this.maxChanges = maxChanges;
@ -422,7 +454,7 @@ public class WordBreakSpellChecker {
* performance. * performance.
* </p> * </p>
* *
* @param maxEvaluations * @see #getMaxEvaluations()
*/ */
public void setMaxEvaluations(int maxEvaluations) { public void setMaxEvaluations(int maxEvaluations) {
this.maxEvaluations = maxEvaluations; this.maxEvaluations = maxEvaluations;

View File

@ -29,11 +29,16 @@ import org.apache.lucene.util.BytesRef;
*/ */
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
// TODO keep this for now // TODO keep this for now
/** buffered term entries */
protected BytesRefList entries = new BytesRefList(); protected BytesRefList entries = new BytesRefList();
/** current buffer position */
protected int curPos = -1; protected int curPos = -1;
/** buffered weights, parallel with {@link #entries} */
protected long[] freqs = new long[1]; protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp; private final Comparator<BytesRef> comp;
/** Creates a new iterator, buffering entries from the specified iterator */
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
this.comp = source.getComparator(); this.comp = source.getComparator();
BytesRef spare; BytesRef spare;

View File

@ -41,6 +41,11 @@ public class FileDictionary implements Dictionary {
private String line; private String line;
private boolean done = false; private boolean done = false;
/**
* Creates a dictionary based on an inputstream.
* <p>
* NOTE: content is treated as UTF-8
*/
public FileDictionary(InputStream dictFile) { public FileDictionary(InputStream dictFile) {
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
} }

View File

@ -37,9 +37,14 @@ public abstract class Lookup {
* Result of a lookup. * Result of a lookup.
*/ */
public static final class LookupResult implements Comparable<LookupResult> { public static final class LookupResult implements Comparable<LookupResult> {
/** the key's text */
public final CharSequence key; public final CharSequence key;
/** the key's weight */
public final long value; public final long value;
/**
* Create a new result from a key+weight pair.
*/
public LookupResult(CharSequence key, long value) { public LookupResult(CharSequence key, long value) {
this.key = key; this.key = key;
this.value = value; this.value = value;
@ -86,6 +91,9 @@ public abstract class Lookup {
*/ */
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> { public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
// TODO: should we move this out of the interface into a utility class? // TODO: should we move this out of the interface into a utility class?
/**
* Creates a new priority queue of the specified size.
*/
public LookupPriorityQueue(int size) { public LookupPriorityQueue(int size) {
super(size); super(size);
} }

View File

@ -47,10 +47,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
private final BytesRef scratch = new BytesRef(); private final BytesRef scratch = new BytesRef();
private final Comparator<BytesRef> comparator; private final Comparator<BytesRef> comparator;
/**
* Calls {@link #SortedTermFreqIteratorWrapper(TermFreqIterator, Comparator, boolean)
* SortedTermFreqIteratorWrapper(source, comparator, false)}
*/
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException { public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
this(source, comparator, false); this(source, comparator, false);
} }
/**
* Creates a new sorted wrapper. if <code>compareRawBytes</code> is true, then
* only the bytes (not the weight) will be used for comparison.
*/
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException { public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
this.source = source; this.source = source;
this.comparator = comparator; this.comparator = comparator;
@ -162,6 +170,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
} }
} }
/** encodes an entry (bytes+weight) to the provided writer */
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException { protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
if (spare.length + 8 >= buffer.length) { if (spare.length + 8 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 8); buffer = ArrayUtil.grow(buffer, spare.length + 8);
@ -172,6 +181,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
} }
/** decodes the weight at the current position */
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
tmpInput.reset(scratch.bytes); tmpInput.reset(scratch.bytes);
tmpInput.skipBytes(scratch.length - 8); // suggestion + separator tmpInput.skipBytes(scratch.length - 8); // suggestion + separator

View File

@ -33,6 +33,10 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
private final int[] ords; private final int[] ords;
private int currentOrd = -1; private int currentOrd = -1;
private final BytesRef spare = new BytesRef(); private final BytesRef spare = new BytesRef();
/**
* Creates a new iterator, wrapping the specified iterator and
* returning elements in a random order.
*/
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
super(source); super(source);
ords = new int[entries.size()]; ords = new int[entries.size()];

View File

@ -123,15 +123,26 @@ public final class Sort {
* Sort info (debugging mostly). * Sort info (debugging mostly).
*/ */
public class SortInfo { public class SortInfo {
/** number of temporary files created when merging partitions */
public int tempMergeFiles; public int tempMergeFiles;
/** number of partition merges */
public int mergeRounds; public int mergeRounds;
/** number of lines of data read */
public int lines; public int lines;
/** time spent merging sorted partitions (in milliseconds) */
public long mergeTime; public long mergeTime;
/** time spent sorting data (in milliseconds) */
public long sortTime; public long sortTime;
/** total time spent (in milliseconds) */
public long totalTime; public long totalTime;
/** time spent in i/o read (in milliseconds) */
public long readTime; public long readTime;
/** read buffer size (in bytes) */
public final long bufferSize = ramBufferSize.bytes; public final long bufferSize = ramBufferSize.bytes;
/** create a new SortInfo (with empty statistics) for debugging */
public SortInfo() {}
@Override @Override
public String toString() { public String toString() {
return String.format(Locale.ROOT, return String.format(Locale.ROOT,

View File

@ -45,6 +45,12 @@ public class JaspellLookup extends Lookup {
private boolean usePrefix = true; private boolean usePrefix = true;
private int editDistance = 2; private int editDistance = 2;
/**
* Creates a new empty trie
* @see #build(TermFreqIterator)
* */
public JaspellLookup() {}
@Override @Override
public void build(TermFreqIterator tfit) throws IOException { public void build(TermFreqIterator tfit) throws IOException {
if (tfit.getComparator() != null) { if (tfit.getComparator() != null) {
@ -68,12 +74,22 @@ public class JaspellLookup extends Lookup {
} }
} }
/**
* Adds a new node if <code>key</code> already exists,
* otherwise replaces its value.
* <p>
* This method always returns false.
*/
public boolean add(CharSequence key, Object value) { public boolean add(CharSequence key, Object value) {
trie.put(key, value); trie.put(key, value);
// XXX // XXX
return false; return false;
} }
/**
* Returns the value for the specified key, or null
* if the key does not exist.
*/
public Object get(CharSequence key) { public Object get(CharSequence key) {
return trie.get(key); return trie.get(key);
} }

View File

@ -160,6 +160,10 @@ public class JaspellTernarySearchTrie {
this(Locale.ROOT); this(Locale.ROOT);
} }
/**
* Constructs an empty Ternary Search Trie,
* specifying the Locale used for lowercasing.
*/
public JaspellTernarySearchTrie(Locale locale) { public JaspellTernarySearchTrie(Locale locale) {
this.locale = locale; this.locale = locale;
} }

View File

@ -26,6 +26,8 @@ import java.util.*;
*/ */
public class TSTAutocomplete { public class TSTAutocomplete {
TSTAutocomplete() {}
/** /**
* Inserting keys in TST in the order middle,small,big (lexicographic measure) * Inserting keys in TST in the order middle,small,big (lexicographic measure)
* recursively creates a balanced tree which reduces insertion and search * recursively creates a balanced tree which reduces insertion and search

View File

@ -43,6 +43,12 @@ public class TSTLookup extends Lookup {
TernaryTreeNode root = new TernaryTreeNode(); TernaryTreeNode root = new TernaryTreeNode();
TSTAutocomplete autocomplete = new TSTAutocomplete(); TSTAutocomplete autocomplete = new TSTAutocomplete();
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
* @see #build(TermFreqIterator)
*/
public TSTLookup() {}
@Override @Override
public void build(TermFreqIterator tfit) throws IOException { public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode(); root = new TernaryTreeNode();
@ -65,12 +71,22 @@ public class TSTLookup extends Lookup {
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
} }
/**
* Adds a new node if <code>key</code> already exists,
* otherwise replaces its value.
* <p>
* This method always returns true.
*/
public boolean add(CharSequence key, Object value) { public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0); autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created // XXX we don't know if a new node was created
return true; return true;
} }
/**
* Returns the value for the specified key, or null
* if the key does not exist.
*/
public Object get(CharSequence key) { public Object get(CharSequence key) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0); List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) { if (list == null || list.isEmpty()) {

View File

@ -22,6 +22,9 @@ package org.apache.lucene.search.suggest.tst;
*/ */
public class TernaryTreeNode { public class TernaryTreeNode {
/** Creates a new empty node */
public TernaryTreeNode() {}
/** the character stored by a node. */ /** the character stored by a node. */
char splitchar; char splitchar;
/** a reference object to the node containing character smaller than this node's character. */ /** a reference object to the node containing character smaller than this node's character. */