SOLR-2688: switch solr 4.0 example to DirectSpellChecker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1154935 13f79535-47bb-0310-9956-ffa450edef68
2025-02-06 10:08:58 +00:00 · 2011-08-08 11:55:03 +00:00 · 2011-08-08 11:55:03 +00:00 · 718f42479f
commit 718f42479f
parent f000ddc947
3 changed files with 39 additions and 18 deletions
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@ -473,6 +473,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar

  private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
+    assert analyzer != null;
    TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
    ts.reset();
    // TODO: support custom attributes
@ -589,6 +590,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
        if (initParams.getName(i).equals("spellchecker")) {
          NamedList spellchecker = (NamedList) initParams.getVal(i);
          String className = (String) spellchecker.get("classname");
+          // TODO: this is a little bit sneaky: warn if class isnt supplied
+          // so that its mandatory in a future release?
          if (className == null)
            className = IndexBasedSpellChecker.class.getName();
          SolrResourceLoader loader = core.getResourceLoader();
--- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Comparator;

 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.spell.DirectSpellChecker;
 import org.apache.lucene.search.spell.StringDistance;
@ -30,6 +31,7 @@ import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.solr.common.params.SpellingParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -65,6 +67,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
  public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
  public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
  public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
+  public static final String FIELD_TYPE = AbstractLuceneSpellChecker.FIELD_TYPE;
  public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
  public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
  public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
@ -91,6 +94,7 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
  
  private DirectSpellChecker checker = new DirectSpellChecker();
  private String field;
+  private String fieldTypeName;
  
  @Override
  public String init(NamedList config, SolrCore core) {
@ -114,6 +118,19 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
      sd = (StringDistance) core.getResourceLoader().newInstance(distClass);

    field = (String) config.get(FIELD);
+    // setup analyzer for field
+    if (field != null && core.getSchema().getFieldTypeNoEx(field) != null)  {
+      analyzer = core.getSchema().getFieldType(field).getQueryAnalyzer();
+    }
+    fieldTypeName = (String) config.get(FIELD_TYPE);
+    if (core.getSchema().getFieldTypes().containsKey(fieldTypeName))  {
+      FieldType fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
+      analyzer = fieldType.getQueryAnalyzer();
+    }
+    if (analyzer == null)   {
+      LOG.info("Using WhitespaceAnalyzer for dictionary: " + name);
+      analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion);
+    }
    
    float minAccuracy = DEFAULT_ACCURACY;
    Float accuracy = (Float) config.get(ACCURACY);
--- a/solr/example/solr/conf/solrconfig.xml
+++ b/solr/example/solr/conf/solrconfig.xml
@ -1111,37 +1111,39 @@
         component
      -->

-    <!-- a spellchecker built from a field of the main index, and
-         written to disk
-      -->
+    <!-- a spellchecker built from a field of the main index -->
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
-      <str name="spellcheckIndexDir">spellchecker</str>
-      <!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
      	<float name="thresholdTokenFrequency">.01</float>
      -->
    </lst>

-    <!-- a spellchecker that uses no auxiliary on disk index -->
-    <!--
-       <lst name="spellchecker">
-         <str name="name">direct</str>
-         <str name="field">name</str>
-         <str name="classname">solr.DirectSolrSpellChecker</str>
-         <int name="minPrefix">1</int>
-       </lst>
-      -->
-
    <!-- a spellchecker that uses a different distance measure -->
    <!--
       <lst name="spellchecker">
         <str name="name">jarowinkler</str>
         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
         <str name="distanceMeasure">
           org.apache.lucene.search.spell.JaroWinklerDistance
         </str>
-         <str name="spellcheckIndexDir">spellcheckerJaro</str>
       </lst>
     -->

@ -1156,9 +1158,8 @@
       <lst name="spellchecker">
         <str name="name">freq</str>
         <str name="field">lowerfilt</str>
-         <str name="spellcheckIndexDir">spellcheckerFreq</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
         <str name="comparatorClass">freq</str>
-         <str name="buildOnCommit">true</str>
      -->

    <!-- A spellchecker that reads the list of words from a file -->