mirror of
https://github.com/apache/lucene.git
synced 2025-02-10 20:15:18 +00:00
LUCENE-4684: Made DirectSpellChecker extendable.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1433431 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9e3740088c
commit
d3971b0c1e
@ -32,8 +32,10 @@ Changes in backwards compatibility policy
|
|||||||
|
|
||||||
======================= Lucene 4.2.0 =======================
|
======================= Lucene 4.2.0 =======================
|
||||||
|
|
||||||
(No changes yet)
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-4684: Made DirectSpellChecker extendable.
|
||||||
|
(Martijn van Groningen)
|
||||||
|
|
||||||
======================= Lucene 4.1.0 =======================
|
======================= Lucene 4.1.0 =======================
|
||||||
|
|
||||||
|
@ -17,20 +17,12 @@ package org.apache.lucene.search.spell;
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.PriorityQueue;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
|
||||||
import org.apache.lucene.search.BoostAttribute;
|
import org.apache.lucene.search.BoostAttribute;
|
||||||
|
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||||
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
@ -39,6 +31,14 @@ import org.apache.lucene.util.CharsRef;
|
|||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple automaton-based spellchecker.
|
* Simple automaton-based spellchecker.
|
||||||
* <p>
|
* <p>
|
||||||
@ -65,28 +65,28 @@ public class DirectSpellChecker {
|
|||||||
public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
|
public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
|
||||||
|
|
||||||
/** maximum edit distance for candidate terms */
|
/** maximum edit distance for candidate terms */
|
||||||
private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
protected int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
||||||
/** minimum prefix for candidate terms */
|
/** minimum prefix for candidate terms */
|
||||||
private int minPrefix = 1;
|
protected int minPrefix = 1;
|
||||||
/** maximum number of top-N inspections per suggestion */
|
/** maximum number of top-N inspections per suggestion */
|
||||||
private int maxInspections = 5;
|
protected int maxInspections = 5;
|
||||||
/** minimum accuracy for a term to match */
|
/** minimum accuracy for a term to match */
|
||||||
private float accuracy = SpellChecker.DEFAULT_ACCURACY;
|
protected float accuracy = SpellChecker.DEFAULT_ACCURACY;
|
||||||
/** value in [0..1] (or absolute number >=1) representing the minimum
|
/** value in [0..1] (or absolute number >=1) representing the minimum
|
||||||
* number of documents (of the total) where a term should appear. */
|
* number of documents (of the total) where a term should appear. */
|
||||||
private float thresholdFrequency = 0f;
|
protected float thresholdFrequency = 0f;
|
||||||
/** minimum length of a query word to return suggestions */
|
/** minimum length of a query word to return suggestions */
|
||||||
private int minQueryLength = 4;
|
protected int minQueryLength = 4;
|
||||||
/** value in [0..1] (or absolute number >=1) representing the maximum
|
/** value in [0..1] (or absolute number >=1) representing the maximum
|
||||||
* number of documents (of the total) a query term can appear in to
|
* number of documents (of the total) a query term can appear in to
|
||||||
* be corrected. */
|
* be corrected. */
|
||||||
private float maxQueryFrequency = 0.01f;
|
protected float maxQueryFrequency = 0.01f;
|
||||||
/** true if the spellchecker should lowercase terms */
|
/** true if the spellchecker should lowercase terms */
|
||||||
private boolean lowerCaseTerms = true;
|
protected boolean lowerCaseTerms = true;
|
||||||
/** the comparator to use */
|
/** the comparator to use */
|
||||||
private Comparator<SuggestWord> comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
|
protected Comparator<SuggestWord> comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||||
/** the string distance to use */
|
/** the string distance to use */
|
||||||
private StringDistance distance = INTERNAL_LEVENSHTEIN;
|
protected StringDistance distance = INTERNAL_LEVENSHTEIN;
|
||||||
|
|
||||||
/** Creates a DirectSpellChecker with default configuration values */
|
/** Creates a DirectSpellChecker with default configuration values */
|
||||||
public DirectSpellChecker() {}
|
public DirectSpellChecker() {}
|
||||||
@ -385,7 +385,7 @@ public class DirectSpellChecker {
|
|||||||
return suggestions;
|
return suggestions;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Collection<ScoreTerm> suggestSimilar(Term term, int numSug,
|
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug,
|
||||||
IndexReader ir, int docfreq, int editDistance, float accuracy, final CharsRef spare) throws IOException {
|
IndexReader ir, int docfreq, int editDistance, float accuracy, final CharsRef spare) throws IOException {
|
||||||
|
|
||||||
AttributeSource atts = new AttributeSource();
|
AttributeSource atts = new AttributeSource();
|
||||||
@ -450,7 +450,7 @@ public class DirectSpellChecker {
|
|||||||
return stQueue;
|
return stQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ScoreTerm implements Comparable<ScoreTerm> {
|
protected static class ScoreTerm implements Comparable<ScoreTerm> {
|
||||||
public BytesRef term;
|
public BytesRef term;
|
||||||
public float boost;
|
public float boost;
|
||||||
public int docfreq;
|
public int docfreq;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user