mirror of https://github.com/apache/lucene.git
LUCENE-4684: Made DirectSpellChecker extendable.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1433431 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9e3740088c
commit
d3971b0c1e
|
@ -32,8 +32,10 @@ Changes in backwards compatibility policy
|
|||
|
||||
======================= Lucene 4.2.0 =======================
|
||||
|
||||
(No changes yet)
|
||||
API Changes
|
||||
|
||||
* LUCENE-4684: Made DirectSpellChecker extendable.
|
||||
(Martijn van Groningen)
|
||||
|
||||
======================= Lucene 4.1.0 =======================
|
||||
|
||||
|
|
|
@ -17,20 +17,12 @@ package org.apache.lucene.search.spell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||
import org.apache.lucene.search.BoostAttribute;
|
||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
@ -39,6 +31,14 @@ import org.apache.lucene.util.CharsRef;
|
|||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Simple automaton-based spellchecker.
|
||||
* <p>
|
||||
|
@ -65,28 +65,28 @@ public class DirectSpellChecker {
|
|||
public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
|
||||
|
||||
/** maximum edit distance for candidate terms */
|
||||
private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
||||
protected int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
||||
/** minimum prefix for candidate terms */
|
||||
private int minPrefix = 1;
|
||||
protected int minPrefix = 1;
|
||||
/** maximum number of top-N inspections per suggestion */
|
||||
private int maxInspections = 5;
|
||||
protected int maxInspections = 5;
|
||||
/** minimum accuracy for a term to match */
|
||||
private float accuracy = SpellChecker.DEFAULT_ACCURACY;
|
||||
protected float accuracy = SpellChecker.DEFAULT_ACCURACY;
|
||||
/** value in [0..1] (or absolute number >=1) representing the minimum
|
||||
* number of documents (of the total) where a term should appear. */
|
||||
private float thresholdFrequency = 0f;
|
||||
protected float thresholdFrequency = 0f;
|
||||
/** minimum length of a query word to return suggestions */
|
||||
private int minQueryLength = 4;
|
||||
protected int minQueryLength = 4;
|
||||
/** value in [0..1] (or absolute number >=1) representing the maximum
|
||||
* number of documents (of the total) a query term can appear in to
|
||||
* be corrected. */
|
||||
private float maxQueryFrequency = 0.01f;
|
||||
protected float maxQueryFrequency = 0.01f;
|
||||
/** true if the spellchecker should lowercase terms */
|
||||
private boolean lowerCaseTerms = true;
|
||||
protected boolean lowerCaseTerms = true;
|
||||
/** the comparator to use */
|
||||
private Comparator<SuggestWord> comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
protected Comparator<SuggestWord> comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
/** the string distance to use */
|
||||
private StringDistance distance = INTERNAL_LEVENSHTEIN;
|
||||
protected StringDistance distance = INTERNAL_LEVENSHTEIN;
|
||||
|
||||
/** Creates a DirectSpellChecker with default configuration values */
|
||||
public DirectSpellChecker() {}
|
||||
|
@ -384,8 +384,8 @@ public class DirectSpellChecker {
|
|||
}
|
||||
return suggestions;
|
||||
}
|
||||
|
||||
private Collection<ScoreTerm> suggestSimilar(Term term, int numSug,
|
||||
|
||||
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug,
|
||||
IndexReader ir, int docfreq, int editDistance, float accuracy, final CharsRef spare) throws IOException {
|
||||
|
||||
AttributeSource atts = new AttributeSource();
|
||||
|
@ -449,8 +449,8 @@ public class DirectSpellChecker {
|
|||
|
||||
return stQueue;
|
||||
}
|
||||
|
||||
private static class ScoreTerm implements Comparable<ScoreTerm> {
|
||||
|
||||
protected static class ScoreTerm implements Comparable<ScoreTerm> {
|
||||
public BytesRef term;
|
||||
public float boost;
|
||||
public int docfreq;
|
||||
|
|
Loading…
Reference in New Issue