SOLR-81: more spellchecker enhancements, cleaned up schema/solrconfig, added support for relative paths as well as RAM based spellchecker directories, fixed NPE when using cmd=rebuild, added cmd=reopen

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@522628 13f79535-47bb-0310-9956-ffa450edef68
2007-03-26 21:11:13 +00:00 · 2007-03-26 21:11:13 +00:00 · d50b65bc82
parent 876dd28846
commit d50b65bc82
4 changed files with 87 additions and 17 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -128,6 +128,10 @@ New Features
 17. SOLR-182: allow lazy loading of request handlers on first request.
    (Ryan McKinley via yonik)

+18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
+    support for relative or absolute directory path configurations, as
+    well as RAM based directory. (hossman)   
+    
 Changes in runtime behavior
 1. Highlighting using DisMax will only pick up terms from the main 
    user query, not boost or filter queries (klaas).
--- a/example/solr/conf/schema.xml
+++ b/example/solr/conf/schema.xml
@ -252,6 +252,14 @@
   <field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
   <field name="inStock" type="boolean" indexed="true" stored="true"/>

+   <!-- Some sample docs exists solely to demonstrate the spellchecker
+        functionality, this is the only field they container.
+        Typically you might build the spellchecker of "catchall" type field
+        containing all of the text in each document
+     -->
+   <field name="word" type="string" indexed="true" stored="true"/>
+
+   
   <!-- catchall field, containing all other searchable text fields (implemented
        via copyField further on in this schema  -->
   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
--- a/example/solr/conf/solrconfig.xml
+++ b/example/solr/conf/solrconfig.xml
@ -353,11 +353,20 @@
     <lst name="defaults">
       <int name="suggestionCount">1</int>
       <float name="accuracy">0.5</float>
-       <str name="version">1.0</str>
     </lst>
-     <!-- main init params for handler --> 
-     <str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
+     
+     <!-- Main init params for handler -->
+     
+     <!-- The directory where your SpellChecker Index should live.   -->
+     <!-- May by absolute, or relative to the Solr "dataDir" directory. -->
+     <!-- If this option is not specified, a ram directory will be used -->
+     <str name="spellcheckerIndexDir">spell</str>
+     
+     <!-- the field in your schema that you want to be able to build -->
+     <!-- your spell index on. This should be a field that uses a very -->
+     <!-- simple FieldType without a lot of Analysis (ie: string) -->
     <str name="termSourceField">word</str>
+     
   </requestHandler>

  
--- a/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
+++ b/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
@ -22,15 +22,21 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.spell.LuceneDictionary;
 import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.solr.request.SolrParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrQueryResponse;
 import org.apache.solr.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrException;

+import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.util.Arrays;
+import java.util.logging.Logger;

 /**
 * Takes a string (e.g. a query string) as the value of the "q" parameter
@ -42,8 +48,10 @@ import java.util.Arrays;
 */
 public class SpellCheckerRequestHandler extends RequestHandlerBase {

+  private static Logger log = Logger.getLogger(SpellCheckerRequestHandler.class.getName());
+  
    private SpellChecker spellChecker;
-
+  
    /*
     * From http://wiki.apache.org/jakarta-lucene/SpellChecker
     * If reader and restrictToField are both not null:
@ -57,11 +65,12 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
     * return only the words more frequent than this.
     * 
     */
-    private static IndexReader reader = null;
+    private static IndexReader nullReader = null;
    private String restrictToField = null;
    private boolean onlyMorePopular = false;

-    private String spellcheckerIndexDir;
+    private Directory spellcheckerIndexDir = new RAMDirectory();
+    private String dirDescription = "(ramdir)";
    private String termSourceField;
    private static final float DEFAULT_ACCURACY = 0.5f;
    private static final int DEFAULT_NUM_SUGGESTIONS = 1;
@ -70,11 +79,23 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
        super.init(args);
        SolrParams p = SolrParams.toSolrParams(args);
        termSourceField = p.get("termSourceField");
-        spellcheckerIndexDir = p.get("spellcheckerIndexDir");
+
        try {
-            spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
+          String dir = p.get("spellcheckerIndexDir");
+          if (null != dir) {
+            File f = new File(dir);
+            if ( ! f.isAbsolute() ) {
+              f = new File(SolrCore.getSolrCore().getDataDir(), dir);
+            }
+            dirDescription = f.getAbsolutePath();
+            log.info("using spell directory: " + dirDescription);
+            spellcheckerIndexDir = FSDirectory.getDirectory(f);
+          } else {
+            log.info("using RAM based spell directory");
+          }
+          spellChecker = new SpellChecker(spellcheckerIndexDir);
        } catch (IOException e) {
-            throw new RuntimeException("Cannot open SpellChecker index", e);
+          throw new RuntimeException("Cannot open SpellChecker index", e);
        }
    }

@ -83,8 +104,18 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
        SolrParams p = req.getParams();
        String words = p.get("q");
        String cmd = p.get("cmd");
-        if (cmd != null && cmd.equals("rebuild"))
+        if (cmd != null) {
+          cmd = cmd.trim();
+          if (cmd.equals("rebuild")) {
            rebuild(req);
+            rsp.add("cmdExecuted","rebuild");
+          } else if (cmd.equals("reopen")) {
+            reopen();
+            rsp.add("cmdExecuted","reopen");
+          } else {
+            throw new SolrException(400, "Unrecognized Command: " + cmd);
+          }
+        }

        Float accuracy;
        int numSug;
@ -100,20 +131,38 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
            throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
        }

-        String[] suggestions = spellChecker.suggestSimilar(words, numSug,
-                reader, restrictToField, onlyMorePopular);
-
-        rsp.add("suggestions", Arrays.asList(suggestions));
+        if (null != words && !"".equals(words.trim())) {
+          String[] suggestions =
+            spellChecker.suggestSimilar(words, numSug,
+                                        nullReader, restrictToField,
+                                        onlyMorePopular);
+          
+          rsp.add("suggestions", Arrays.asList(suggestions));
+        }
    }

    /** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
     * index pointed to by the current {@link IndexSearcher}.
     */
-    private void rebuild(SolrQueryRequest req) throws IOException {
+    private void rebuild(SolrQueryRequest req) throws IOException, SolrException {
+      if (null == termSourceField) {
+        throw new SolrException
+          (500, "can't rebuild spellchecker index without termSourceField configured");
+      }
+      
        IndexReader indexReader = req.getSearcher().getReader();
        Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
        spellChecker.indexDictionary(dictionary);
-        spellChecker.setSpellIndex(FSDirectory.getDirectory(spellcheckerIndexDir));
+        reopen();
+    }
+  
+    /**
+     * Reopens the SpellChecker index directory.
+     * Useful if an external process is responsible for building
+     * the spell checker index.
+     */
+    private void reopen() throws IOException {
+        spellChecker.setSpellIndex(spellcheckerIndexDir);
    }

    //////////////////////// SolrInfoMBeans methods //////////////////////
@ -123,7 +172,7 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
    }

    public String getDescription() {
-        return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
+      return "The SpellChecker Solr request handler for SpellChecker index: " + dirDescription;
    }

    public String getSourceId() {