mirror of https://github.com/apache/lucene.git
SOLR-2688: switch solr 4.0 example to DirectSpellChecker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1154935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f000ddc947
commit
718f42479f
|
@ -473,6 +473,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
||||
Collection<Token> result = new ArrayList<Token>();
|
||||
assert analyzer != null;
|
||||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
|
||||
ts.reset();
|
||||
// TODO: support custom attributes
|
||||
|
@ -589,6 +590,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (initParams.getName(i).equals("spellchecker")) {
|
||||
NamedList spellchecker = (NamedList) initParams.getVal(i);
|
||||
String className = (String) spellchecker.get("classname");
|
||||
// TODO: this is a little bit sneaky: warn if class isnt supplied
|
||||
// so that its mandatory in a future release?
|
||||
if (className == null)
|
||||
className = IndexBasedSpellChecker.class.getName();
|
||||
SolrResourceLoader loader = core.getResourceLoader();
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
|
@ -30,6 +31,7 @@ import org.apache.lucene.search.spell.SuggestWordQueue;
|
|||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -65,6 +67,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
|||
public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
|
||||
public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
|
||||
public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
|
||||
public static final String FIELD_TYPE = AbstractLuceneSpellChecker.FIELD_TYPE;
|
||||
public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
|
||||
public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
|
||||
public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
|
||||
|
@ -91,6 +94,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
|||
|
||||
private DirectSpellChecker checker = new DirectSpellChecker();
|
||||
private String field;
|
||||
private String fieldTypeName;
|
||||
|
||||
@Override
|
||||
public String init(NamedList config, SolrCore core) {
|
||||
|
@ -114,6 +118,19 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
|||
sd = (StringDistance) core.getResourceLoader().newInstance(distClass);
|
||||
|
||||
field = (String) config.get(FIELD);
|
||||
// setup analyzer for field
|
||||
if (field != null && core.getSchema().getFieldTypeNoEx(field) != null) {
|
||||
analyzer = core.getSchema().getFieldType(field).getQueryAnalyzer();
|
||||
}
|
||||
fieldTypeName = (String) config.get(FIELD_TYPE);
|
||||
if (core.getSchema().getFieldTypes().containsKey(fieldTypeName)) {
|
||||
FieldType fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
|
||||
analyzer = fieldType.getQueryAnalyzer();
|
||||
}
|
||||
if (analyzer == null) {
|
||||
LOG.info("Using WhitespaceAnalyzer for dictionary: " + name);
|
||||
analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion);
|
||||
}
|
||||
|
||||
float minAccuracy = DEFAULT_ACCURACY;
|
||||
Float accuracy = (Float) config.get(ACCURACY);
|
||||
|
|
|
@ -1111,37 +1111,39 @@
|
|||
component
|
||||
-->
|
||||
|
||||
<!-- a spellchecker built from a field of the main index, and
|
||||
written to disk
|
||||
-->
|
||||
<!-- a spellchecker built from a field of the main index -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">name</str>
|
||||
<str name="spellcheckIndexDir">spellchecker</str>
|
||||
<!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||
<str name="distanceMeasure">internal</str>
|
||||
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
|
||||
<float name="accuracy">0.5</float>
|
||||
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
|
||||
<int name="maxEdits">2</int>
|
||||
<!-- the minimum shared prefix when enumerating terms -->
|
||||
<int name="minPrefix">1</int>
|
||||
<!-- maximum number of inspections per result. -->
|
||||
<int name="maxInspections">5</int>
|
||||
<!-- minimum length of a query term to be considered for correction -->
|
||||
<int name="minQueryLength">4</int>
|
||||
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
|
||||
<float name="maxQueryFrequency">0.01</float>
|
||||
<!-- uncomment this to require suggestions to occur in 1% of the documents
|
||||
<float name="thresholdTokenFrequency">.01</float>
|
||||
-->
|
||||
</lst>
|
||||
|
||||
<!-- a spellchecker that uses no auxiliary on disk index -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">direct</str>
|
||||
<str name="field">name</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<int name="minPrefix">1</int>
|
||||
</lst>
|
||||
-->
|
||||
|
||||
<!-- a spellchecker that uses a different distance measure -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">jarowinkler</str>
|
||||
<str name="field">spell</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="distanceMeasure">
|
||||
org.apache.lucene.search.spell.JaroWinklerDistance
|
||||
</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerJaro</str>
|
||||
</lst>
|
||||
-->
|
||||
|
||||
|
@ -1156,9 +1158,8 @@
|
|||
<lst name="spellchecker">
|
||||
<str name="name">freq</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFreq</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="comparatorClass">freq</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
-->
|
||||
|
||||
<!-- A spellchecker that reads the list of words from a file -->
|
||||
|
|
Loading…
Reference in New Issue