SOLR-2688: switch solr 4.0 example to DirectSpellChecker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1154935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-08-08 11:55:03 +00:00
parent f000ddc947
commit 718f42479f
3 changed files with 39 additions and 18 deletions

View File

@ -473,6 +473,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
Collection<Token> result = new ArrayList<Token>();
assert analyzer != null;
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
ts.reset();
// TODO: support custom attributes
@ -589,6 +590,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
if (initParams.getName(i).equals("spellchecker")) {
NamedList spellchecker = (NamedList) initParams.getVal(i);
String className = (String) spellchecker.get("classname");
// TODO: this is a little bit sneaky: warn if class isnt supplied
// so that its mandatory in a future release?
if (className == null)
className = IndexBasedSpellChecker.class.getName();
SolrResourceLoader loader = core.getResourceLoader();

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.StringDistance;
@ -30,6 +31,7 @@ import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -65,6 +67,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
public static final String FIELD_TYPE = AbstractLuceneSpellChecker.FIELD_TYPE;
public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
@ -91,6 +94,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
private DirectSpellChecker checker = new DirectSpellChecker();
private String field;
private String fieldTypeName;
@Override
public String init(NamedList config, SolrCore core) {
@ -114,6 +118,19 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
sd = (StringDistance) core.getResourceLoader().newInstance(distClass);
field = (String) config.get(FIELD);
// setup analyzer for field
if (field != null && core.getSchema().getFieldTypeNoEx(field) != null) {
analyzer = core.getSchema().getFieldType(field).getQueryAnalyzer();
}
fieldTypeName = (String) config.get(FIELD_TYPE);
if (core.getSchema().getFieldTypes().containsKey(fieldTypeName)) {
FieldType fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
analyzer = fieldType.getQueryAnalyzer();
}
if (analyzer == null) {
LOG.info("Using WhitespaceAnalyzer for dictionary: " + name);
analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion);
}
float minAccuracy = DEFAULT_ACCURACY;
Float accuracy = (Float) config.get(ACCURACY);

View File

@ -1111,37 +1111,39 @@
component
-->
<!-- a spellchecker built from a field of the main index, and
written to disk
-->
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">spellchecker</str>
<!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
<!-- a spellchecker that uses no auxiliary on disk index -->
<!--
<lst name="spellchecker">
<str name="name">direct</str>
<str name="field">name</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<int name="minPrefix">1</int>
</lst>
-->
<!-- a spellchecker that uses a different distance measure -->
<!--
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">spell</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">
org.apache.lucene.search.spell.JaroWinklerDistance
</str>
<str name="spellcheckIndexDir">spellcheckerJaro</str>
</lst>
-->
@ -1156,9 +1158,8 @@
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">true</str>
-->
<!-- A spellchecker that reads the list of words from a file -->