mirror of https://github.com/apache/lucene.git
javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388507 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b8f9525142
commit
d4aace1038
|
@ -257,7 +257,7 @@
|
||||||
<!-- queryparser: problems -->
|
<!-- queryparser: problems -->
|
||||||
<!-- sandbox: problems -->
|
<!-- sandbox: problems -->
|
||||||
<!-- spatial: problems -->
|
<!-- spatial: problems -->
|
||||||
<!-- suggest: problems -->
|
<check-missing-javadocs dir="build/docs/suggest" level="method"/>
|
||||||
<!-- test-framework: problems -->
|
<!-- test-framework: problems -->
|
||||||
|
|
||||||
<!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
|
<!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
|
||||||
|
|
|
@ -30,6 +30,11 @@ public class CombineSuggestion {
|
||||||
*/
|
*/
|
||||||
public final SuggestWord suggestion;
|
public final SuggestWord suggestion;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new CombineSuggestion from a <code>suggestion</code> and
|
||||||
|
* an array of term ids (referencing the indexes to the original terms that
|
||||||
|
* form this combined suggestion)
|
||||||
|
*/
|
||||||
public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) {
|
public CombineSuggestion (SuggestWord suggestion, int[] originalTermIndexes) {
|
||||||
this.suggestion = suggestion;
|
this.suggestion = suggestion;
|
||||||
this.originalTermIndexes = originalTermIndexes;
|
this.originalTermIndexes = originalTermIndexes;
|
||||||
|
|
|
@ -42,6 +42,13 @@ public class HighFrequencyDictionary implements Dictionary {
|
||||||
private String field;
|
private String field;
|
||||||
private float thresh;
|
private float thresh;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new Dictionary, pulling source terms from
|
||||||
|
* the specified <code>field</code> in the provided <code>reader</code>.
|
||||||
|
* <p>
|
||||||
|
* Terms appearing in less than <code>thres</code> percentage of documents
|
||||||
|
* will be excluded.
|
||||||
|
*/
|
||||||
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
|
|
|
@ -28,6 +28,13 @@ public class JaroWinklerDistance implements StringDistance {
|
||||||
|
|
||||||
private float threshold = 0.7f;
|
private float threshold = 0.7f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new distance metric with the default threshold
|
||||||
|
* for the Jaro Winkler bonus (0.7)
|
||||||
|
* @see #setThreshold(float)
|
||||||
|
*/
|
||||||
|
public JaroWinklerDistance() {}
|
||||||
|
|
||||||
private int[] matches(String s1, String s2) {
|
private int[] matches(String s1, String s2) {
|
||||||
String max, min;
|
String max, min;
|
||||||
if (s1.length() > s2.length()) {
|
if (s1.length() > s2.length()) {
|
||||||
|
|
|
@ -27,14 +27,15 @@ import java.io.*;
|
||||||
/**
|
/**
|
||||||
* Lucene Dictionary: terms taken from the given field
|
* Lucene Dictionary: terms taken from the given field
|
||||||
* of a Lucene index.
|
* of a Lucene index.
|
||||||
*
|
|
||||||
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
|
|
||||||
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
|
|
||||||
*/
|
*/
|
||||||
public class LuceneDictionary implements Dictionary {
|
public class LuceneDictionary implements Dictionary {
|
||||||
private IndexReader reader;
|
private IndexReader reader;
|
||||||
private String field;
|
private String field;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new Dictionary, pulling source terms from
|
||||||
|
* the specified <code>field</code> in the provided <code>reader</code>
|
||||||
|
*/
|
||||||
public LuceneDictionary(IndexReader reader, String field) {
|
public LuceneDictionary(IndexReader reader, String field) {
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
|
|
|
@ -39,6 +39,12 @@ import org.apache.lucene.util.IntsRef;
|
||||||
*/
|
*/
|
||||||
public final class LuceneLevenshteinDistance implements StringDistance {
|
public final class LuceneLevenshteinDistance implements StringDistance {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new comparator, mimicing the behavior of Lucene's internal
|
||||||
|
* edit distance.
|
||||||
|
*/
|
||||||
|
public LuceneLevenshteinDistance() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getDistance(String target, String other) {
|
public float getDistance(String target, String other) {
|
||||||
IntsRef targetPoints;
|
IntsRef targetPoints;
|
||||||
|
|
|
@ -38,10 +38,20 @@ public class PlainTextDictionary implements Dictionary {
|
||||||
|
|
||||||
private BufferedReader in;
|
private BufferedReader in;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a dictionary based on a File.
|
||||||
|
* <p>
|
||||||
|
* NOTE: content is treated as UTF-8
|
||||||
|
*/
|
||||||
public PlainTextDictionary(File file) throws IOException {
|
public PlainTextDictionary(File file) throws IOException {
|
||||||
in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
|
in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a dictionary based on an inputstream.
|
||||||
|
* <p>
|
||||||
|
* NOTE: content is treated as UTF-8
|
||||||
|
*/
|
||||||
public PlainTextDictionary(InputStream dictFile) {
|
public PlainTextDictionary(InputStream dictFile) {
|
||||||
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,11 @@ package org.apache.lucene.search.spell;
|
||||||
*/
|
*/
|
||||||
public final class SuggestWord{
|
public final class SuggestWord{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new empty suggestion with null text.
|
||||||
|
*/
|
||||||
|
public SuggestWord() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the score of the word
|
* the score of the word
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,11 +20,17 @@ import java.util.Comparator;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Frequency first, then score. Must have
|
* Frequency first, then score.
|
||||||
*
|
*
|
||||||
**/
|
**/
|
||||||
public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
|
public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new comparator that will compare by {@link SuggestWord#freq},
|
||||||
|
* then by {@link SuggestWord#score}, then by {@link SuggestWord#string}.
|
||||||
|
*/
|
||||||
|
public SuggestWordFrequencyComparator() {}
|
||||||
|
|
||||||
public int compare(SuggestWord first, SuggestWord second) {
|
public int compare(SuggestWord first, SuggestWord second) {
|
||||||
// first criteria: the frequency
|
// first criteria: the frequency
|
||||||
if (first.freq > second.freq) {
|
if (first.freq > second.freq) {
|
||||||
|
|
|
@ -31,6 +31,10 @@ import java.util.Comparator;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
|
public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
|
||||||
|
/**
|
||||||
|
* Default comparator: score then frequency.
|
||||||
|
* @see SuggestWordScoreComparator
|
||||||
|
*/
|
||||||
public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
|
public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,13 @@ import java.util.Comparator;
|
||||||
*
|
*
|
||||||
**/
|
**/
|
||||||
public class SuggestWordScoreComparator implements Comparator<SuggestWord> {
|
public class SuggestWordScoreComparator implements Comparator<SuggestWord> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new comparator that will compare by {@link SuggestWord#score},
|
||||||
|
* then by {@link SuggestWord#freq}, then by {@link SuggestWord#string}.
|
||||||
|
*/
|
||||||
|
public SuggestWordScoreComparator() {}
|
||||||
|
|
||||||
public int compare(SuggestWord first, SuggestWord second) {
|
public int compare(SuggestWord first, SuggestWord second) {
|
||||||
// first criteria: the distance
|
// first criteria: the distance
|
||||||
if (first.score > second.score) {
|
if (first.score > second.score) {
|
||||||
|
|
|
@ -38,6 +38,10 @@ public interface TermFreqIterator extends BytesRefIterator {
|
||||||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
private BytesRefIterator wrapped;
|
private BytesRefIterator wrapped;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new wrapper, wrapping the specified iterator and
|
||||||
|
* specifying a weight value of <code>1</code> for all terms.
|
||||||
|
*/
|
||||||
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
|
public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
|
||||||
this.wrapped = wrapped;
|
this.wrapped = wrapped;
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,8 +40,19 @@ public class WordBreakSpellChecker {
|
||||||
private int maxChanges = 1;
|
private int maxChanges = 1;
|
||||||
private int maxEvaluations = 1000;
|
private int maxEvaluations = 1000;
|
||||||
|
|
||||||
|
/** Term that can be used to prohibit adjacent terms from being combined */
|
||||||
public static final Term SEPARATOR_TERM = new Term("", "");
|
public static final Term SEPARATOR_TERM = new Term("", "");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new spellchecker with default configuration values
|
||||||
|
* @see #setMaxChanges(int)
|
||||||
|
* @see #setMaxCombineWordLength(int)
|
||||||
|
* @see #setMaxEvaluations(int)
|
||||||
|
* @see #setMinBreakWordLength(int)
|
||||||
|
* @see #setMinSuggestionFrequency(int)
|
||||||
|
*/
|
||||||
|
public WordBreakSpellChecker() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
* Determines the order to list word break suggestions
|
* Determines the order to list word break suggestions
|
||||||
|
@ -347,22 +358,43 @@ public class WordBreakSpellChecker {
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the minimum frequency a term must have
|
||||||
|
* to be part of a suggestion.
|
||||||
|
* @see #setMinSuggestionFrequency(int)
|
||||||
|
*/
|
||||||
public int getMinSuggestionFrequency() {
|
public int getMinSuggestionFrequency() {
|
||||||
return minSuggestionFrequency;
|
return minSuggestionFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the maximum length of a combined suggestion
|
||||||
|
* @see #setMaxCombineWordLength(int)
|
||||||
|
*/
|
||||||
public int getMaxCombineWordLength() {
|
public int getMaxCombineWordLength() {
|
||||||
return maxCombineWordLength;
|
return maxCombineWordLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the minimum size of a broken word
|
||||||
|
* @see #setMinBreakWordLength(int)
|
||||||
|
*/
|
||||||
public int getMinBreakWordLength() {
|
public int getMinBreakWordLength() {
|
||||||
return minBreakWordLength;
|
return minBreakWordLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the maximum number of changes to perform on the input
|
||||||
|
* @see #setMaxChanges(int)
|
||||||
|
*/
|
||||||
public int getMaxChanges() {
|
public int getMaxChanges() {
|
||||||
return maxChanges;
|
return maxChanges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the maximum number of word combinations to evaluate.
|
||||||
|
* @see #setMaxEvaluations(int)
|
||||||
|
*/
|
||||||
public int getMaxEvaluations() {
|
public int getMaxEvaluations() {
|
||||||
return maxEvaluations;
|
return maxEvaluations;
|
||||||
}
|
}
|
||||||
|
@ -374,7 +406,7 @@ public class WordBreakSpellChecker {
|
||||||
* {@link SuggestMode#SUGGEST_MORE_POPULAR}
|
* {@link SuggestMode#SUGGEST_MORE_POPULAR}
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param minSuggestionFrequency
|
* @see #getMinSuggestionFrequency()
|
||||||
*/
|
*/
|
||||||
public void setMinSuggestionFrequency(int minSuggestionFrequency) {
|
public void setMinSuggestionFrequency(int minSuggestionFrequency) {
|
||||||
this.minSuggestionFrequency = minSuggestionFrequency;
|
this.minSuggestionFrequency = minSuggestionFrequency;
|
||||||
|
@ -386,7 +418,7 @@ public class WordBreakSpellChecker {
|
||||||
* terms. Default=20
|
* terms. Default=20
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param maxCombineWordLength
|
* @see #getMaxCombineWordLength()
|
||||||
*/
|
*/
|
||||||
public void setMaxCombineWordLength(int maxCombineWordLength) {
|
public void setMaxCombineWordLength(int maxCombineWordLength) {
|
||||||
this.maxCombineWordLength = maxCombineWordLength;
|
this.maxCombineWordLength = maxCombineWordLength;
|
||||||
|
@ -397,7 +429,7 @@ public class WordBreakSpellChecker {
|
||||||
* The minimum length to break words down to. Default=1
|
* The minimum length to break words down to. Default=1
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param minBreakWordLength
|
* @see #getMinBreakWordLength()
|
||||||
*/
|
*/
|
||||||
public void setMinBreakWordLength(int minBreakWordLength) {
|
public void setMinBreakWordLength(int minBreakWordLength) {
|
||||||
this.minBreakWordLength = minBreakWordLength;
|
this.minBreakWordLength = minBreakWordLength;
|
||||||
|
@ -409,7 +441,7 @@ public class WordBreakSpellChecker {
|
||||||
* original term(s). Default=1
|
* original term(s). Default=1
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param maxChanges
|
* @see #getMaxChanges()
|
||||||
*/
|
*/
|
||||||
public void setMaxChanges(int maxChanges) {
|
public void setMaxChanges(int maxChanges) {
|
||||||
this.maxChanges = maxChanges;
|
this.maxChanges = maxChanges;
|
||||||
|
@ -422,7 +454,7 @@ public class WordBreakSpellChecker {
|
||||||
* performance.
|
* performance.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param maxEvaluations
|
* @see #getMaxEvaluations()
|
||||||
*/
|
*/
|
||||||
public void setMaxEvaluations(int maxEvaluations) {
|
public void setMaxEvaluations(int maxEvaluations) {
|
||||||
this.maxEvaluations = maxEvaluations;
|
this.maxEvaluations = maxEvaluations;
|
||||||
|
|
|
@ -29,11 +29,16 @@ import org.apache.lucene.util.BytesRef;
|
||||||
*/
|
*/
|
||||||
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
// TODO keep this for now
|
// TODO keep this for now
|
||||||
|
/** buffered term entries */
|
||||||
protected BytesRefList entries = new BytesRefList();
|
protected BytesRefList entries = new BytesRefList();
|
||||||
|
/** current buffer position */
|
||||||
protected int curPos = -1;
|
protected int curPos = -1;
|
||||||
|
/** buffered weights, parallel with {@link #entries} */
|
||||||
protected long[] freqs = new long[1];
|
protected long[] freqs = new long[1];
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
private final Comparator<BytesRef> comp;
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
/** Creates a new iterator, buffering entries from the specified iterator */
|
||||||
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||||
this.comp = source.getComparator();
|
this.comp = source.getComparator();
|
||||||
BytesRef spare;
|
BytesRef spare;
|
||||||
|
|
|
@ -41,6 +41,11 @@ public class FileDictionary implements Dictionary {
|
||||||
private String line;
|
private String line;
|
||||||
private boolean done = false;
|
private boolean done = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a dictionary based on an inputstream.
|
||||||
|
* <p>
|
||||||
|
* NOTE: content is treated as UTF-8
|
||||||
|
*/
|
||||||
public FileDictionary(InputStream dictFile) {
|
public FileDictionary(InputStream dictFile) {
|
||||||
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,9 +37,14 @@ public abstract class Lookup {
|
||||||
* Result of a lookup.
|
* Result of a lookup.
|
||||||
*/
|
*/
|
||||||
public static final class LookupResult implements Comparable<LookupResult> {
|
public static final class LookupResult implements Comparable<LookupResult> {
|
||||||
|
/** the key's text */
|
||||||
public final CharSequence key;
|
public final CharSequence key;
|
||||||
|
/** the key's weight */
|
||||||
public final long value;
|
public final long value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new result from a key+weight pair.
|
||||||
|
*/
|
||||||
public LookupResult(CharSequence key, long value) {
|
public LookupResult(CharSequence key, long value) {
|
||||||
this.key = key;
|
this.key = key;
|
||||||
this.value = value;
|
this.value = value;
|
||||||
|
@ -86,6 +91,9 @@ public abstract class Lookup {
|
||||||
*/
|
*/
|
||||||
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
|
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
|
||||||
// TODO: should we move this out of the interface into a utility class?
|
// TODO: should we move this out of the interface into a utility class?
|
||||||
|
/**
|
||||||
|
* Creates a new priority queue of the specified size.
|
||||||
|
*/
|
||||||
public LookupPriorityQueue(int size) {
|
public LookupPriorityQueue(int size) {
|
||||||
super(size);
|
super(size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,10 +47,18 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
private final BytesRef scratch = new BytesRef();
|
private final BytesRef scratch = new BytesRef();
|
||||||
private final Comparator<BytesRef> comparator;
|
private final Comparator<BytesRef> comparator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls {@link #SortedTermFreqIteratorWrapper(TermFreqIterator, Comparator, boolean)
|
||||||
|
* SortedTermFreqIteratorWrapper(source, comparator, false)}
|
||||||
|
*/
|
||||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
|
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||||
this(source, comparator, false);
|
this(source, comparator, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new sorted wrapper. if <code>compareRawBytes</code> is true, then
|
||||||
|
* only the bytes (not the weight) will be used for comparison.
|
||||||
|
*/
|
||||||
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
|
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
|
@ -162,6 +170,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** encodes an entry (bytes+weight) to the provided writer */
|
||||||
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
|
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
|
||||||
if (spare.length + 8 >= buffer.length) {
|
if (spare.length + 8 >= buffer.length) {
|
||||||
buffer = ArrayUtil.grow(buffer, spare.length + 8);
|
buffer = ArrayUtil.grow(buffer, spare.length + 8);
|
||||||
|
@ -172,6 +181,7 @@ public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
|
||||||
writer.write(buffer, 0, output.getPosition());
|
writer.write(buffer, 0, output.getPosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** decodes the weight at the current position */
|
||||||
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
|
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
|
||||||
tmpInput.reset(scratch.bytes);
|
tmpInput.reset(scratch.bytes);
|
||||||
tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
|
tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
|
||||||
|
|
|
@ -33,6 +33,10 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr
|
||||||
private final int[] ords;
|
private final int[] ords;
|
||||||
private int currentOrd = -1;
|
private int currentOrd = -1;
|
||||||
private final BytesRef spare = new BytesRef();
|
private final BytesRef spare = new BytesRef();
|
||||||
|
/**
|
||||||
|
* Creates a new iterator, wrapping the specified iterator and
|
||||||
|
* returning elements in a random order.
|
||||||
|
*/
|
||||||
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
|
||||||
super(source);
|
super(source);
|
||||||
ords = new int[entries.size()];
|
ords = new int[entries.size()];
|
||||||
|
|
|
@ -123,15 +123,26 @@ public final class Sort {
|
||||||
* Sort info (debugging mostly).
|
* Sort info (debugging mostly).
|
||||||
*/
|
*/
|
||||||
public class SortInfo {
|
public class SortInfo {
|
||||||
|
/** number of temporary files created when merging partitions */
|
||||||
public int tempMergeFiles;
|
public int tempMergeFiles;
|
||||||
|
/** number of partition merges */
|
||||||
public int mergeRounds;
|
public int mergeRounds;
|
||||||
|
/** number of lines of data read */
|
||||||
public int lines;
|
public int lines;
|
||||||
|
/** time spent merging sorted partitions (in milliseconds) */
|
||||||
public long mergeTime;
|
public long mergeTime;
|
||||||
|
/** time spent sorting data (in milliseconds) */
|
||||||
public long sortTime;
|
public long sortTime;
|
||||||
|
/** total time spent (in milliseconds) */
|
||||||
public long totalTime;
|
public long totalTime;
|
||||||
|
/** time spent in i/o read (in milliseconds) */
|
||||||
public long readTime;
|
public long readTime;
|
||||||
|
/** read buffer size (in bytes) */
|
||||||
public final long bufferSize = ramBufferSize.bytes;
|
public final long bufferSize = ramBufferSize.bytes;
|
||||||
|
|
||||||
|
/** create a new SortInfo (with empty statistics) for debugging */
|
||||||
|
public SortInfo() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format(Locale.ROOT,
|
return String.format(Locale.ROOT,
|
||||||
|
|
|
@ -45,6 +45,12 @@ public class JaspellLookup extends Lookup {
|
||||||
private boolean usePrefix = true;
|
private boolean usePrefix = true;
|
||||||
private int editDistance = 2;
|
private int editDistance = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new empty trie
|
||||||
|
* @see #build(TermFreqIterator)
|
||||||
|
* */
|
||||||
|
public JaspellLookup() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void build(TermFreqIterator tfit) throws IOException {
|
public void build(TermFreqIterator tfit) throws IOException {
|
||||||
if (tfit.getComparator() != null) {
|
if (tfit.getComparator() != null) {
|
||||||
|
@ -68,12 +74,22 @@ public class JaspellLookup extends Lookup {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new node if <code>key</code> already exists,
|
||||||
|
* otherwise replaces its value.
|
||||||
|
* <p>
|
||||||
|
* This method always returns false.
|
||||||
|
*/
|
||||||
public boolean add(CharSequence key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
trie.put(key, value);
|
trie.put(key, value);
|
||||||
// XXX
|
// XXX
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value for the specified key, or null
|
||||||
|
* if the key does not exist.
|
||||||
|
*/
|
||||||
public Object get(CharSequence key) {
|
public Object get(CharSequence key) {
|
||||||
return trie.get(key);
|
return trie.get(key);
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,6 +160,10 @@ public class JaspellTernarySearchTrie {
|
||||||
this(Locale.ROOT);
|
this(Locale.ROOT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs an empty Ternary Search Trie,
|
||||||
|
* specifying the Locale used for lowercasing.
|
||||||
|
*/
|
||||||
public JaspellTernarySearchTrie(Locale locale) {
|
public JaspellTernarySearchTrie(Locale locale) {
|
||||||
this.locale = locale;
|
this.locale = locale;
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,8 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class TSTAutocomplete {
|
public class TSTAutocomplete {
|
||||||
|
|
||||||
|
TSTAutocomplete() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inserting keys in TST in the order middle,small,big (lexicographic measure)
|
* Inserting keys in TST in the order middle,small,big (lexicographic measure)
|
||||||
* recursively creates a balanced tree which reduces insertion and search
|
* recursively creates a balanced tree which reduces insertion and search
|
||||||
|
|
|
@ -43,6 +43,12 @@ public class TSTLookup extends Lookup {
|
||||||
TernaryTreeNode root = new TernaryTreeNode();
|
TernaryTreeNode root = new TernaryTreeNode();
|
||||||
TSTAutocomplete autocomplete = new TSTAutocomplete();
|
TSTAutocomplete autocomplete = new TSTAutocomplete();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new TSTLookup with an empty Ternary Search Tree.
|
||||||
|
* @see #build(TermFreqIterator)
|
||||||
|
*/
|
||||||
|
public TSTLookup() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void build(TermFreqIterator tfit) throws IOException {
|
public void build(TermFreqIterator tfit) throws IOException {
|
||||||
root = new TernaryTreeNode();
|
root = new TernaryTreeNode();
|
||||||
|
@ -65,12 +71,22 @@ public class TSTLookup extends Lookup {
|
||||||
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new node if <code>key</code> already exists,
|
||||||
|
* otherwise replaces its value.
|
||||||
|
* <p>
|
||||||
|
* This method always returns true.
|
||||||
|
*/
|
||||||
public boolean add(CharSequence key, Object value) {
|
public boolean add(CharSequence key, Object value) {
|
||||||
autocomplete.insert(root, key, value, 0);
|
autocomplete.insert(root, key, value, 0);
|
||||||
// XXX we don't know if a new node was created
|
// XXX we don't know if a new node was created
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value for the specified key, or null
|
||||||
|
* if the key does not exist.
|
||||||
|
*/
|
||||||
public Object get(CharSequence key) {
|
public Object get(CharSequence key) {
|
||||||
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
||||||
if (list == null || list.isEmpty()) {
|
if (list == null || list.isEmpty()) {
|
||||||
|
|
|
@ -22,6 +22,9 @@ package org.apache.lucene.search.suggest.tst;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TernaryTreeNode {
|
public class TernaryTreeNode {
|
||||||
|
|
||||||
|
/** Creates a new empty node */
|
||||||
|
public TernaryTreeNode() {}
|
||||||
/** the character stored by a node. */
|
/** the character stored by a node. */
|
||||||
char splitchar;
|
char splitchar;
|
||||||
/** a reference object to the node containing character smaller than this node's character. */
|
/** a reference object to the node containing character smaller than this node's character. */
|
||||||
|
|
Loading…
Reference in New Issue