SOLR-81: more spellchecker enhancements, cleaned up schema/solrconfig, added support for relative paths as well as RAM based spellchecker directories, fixed NPE when using cmd=rebuild, added cmd=reopen

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@522628 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2007-03-26 21:11:13 +00:00
parent 876dd28846
commit d50b65bc82
4 changed files with 87 additions and 17 deletions

View File

@ -128,6 +128,10 @@ New Features
17. SOLR-182: allow lazy loading of request handlers on first request.
(Ryan McKinley via yonik)
18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
support for relative or absolute directory path configurations, as
well as RAM based directory. (hossman)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main
user query, not boost or filter queries (klaas).

View File

@ -252,6 +252,14 @@
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
<field name="inStock" type="boolean" indexed="true" stored="true"/>
<!-- Some sample docs exists solely to demonstrate the spellchecker
functionality, this is the only field they container.
Typically you might build the spellchecker of "catchall" type field
containing all of the text in each document
-->
<field name="word" type="string" indexed="true" stored="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>

View File

@ -353,11 +353,20 @@
<lst name="defaults">
<int name="suggestionCount">1</int>
<float name="accuracy">0.5</float>
<str name="version">1.0</str>
</lst>
<!-- main init params for handler -->
<str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
<!-- Main init params for handler -->
<!-- The directory where your SpellChecker Index should live. -->
<!-- May by absolute, or relative to the Solr "dataDir" directory. -->
<!-- If this option is not specified, a ram directory will be used -->
<str name="spellcheckerIndexDir">spell</str>
<!-- the field in your schema that you want to be able to build -->
<!-- your spell index on. This should be a field that uses a very -->
<!-- simple FieldType without a lot of Analysis (ie: string) -->
<str name="termSourceField">word</str>
</requestHandler>

View File

@ -22,15 +22,21 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.request.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
import java.util.logging.Logger;
/**
* Takes a string (e.g. a query string) as the value of the "q" parameter
@ -42,6 +48,8 @@ import java.util.Arrays;
*/
public class SpellCheckerRequestHandler extends RequestHandlerBase {
private static Logger log = Logger.getLogger(SpellCheckerRequestHandler.class.getName());
private SpellChecker spellChecker;
/*
@ -57,11 +65,12 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
* return only the words more frequent than this.
*
*/
private static IndexReader reader = null;
private static IndexReader nullReader = null;
private String restrictToField = null;
private boolean onlyMorePopular = false;
private String spellcheckerIndexDir;
private Directory spellcheckerIndexDir = new RAMDirectory();
private String dirDescription = "(ramdir)";
private String termSourceField;
private static final float DEFAULT_ACCURACY = 0.5f;
private static final int DEFAULT_NUM_SUGGESTIONS = 1;
@ -70,11 +79,23 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
super.init(args);
SolrParams p = SolrParams.toSolrParams(args);
termSourceField = p.get("termSourceField");
spellcheckerIndexDir = p.get("spellcheckerIndexDir");
try {
spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
String dir = p.get("spellcheckerIndexDir");
if (null != dir) {
File f = new File(dir);
if ( ! f.isAbsolute() ) {
f = new File(SolrCore.getSolrCore().getDataDir(), dir);
}
dirDescription = f.getAbsolutePath();
log.info("using spell directory: " + dirDescription);
spellcheckerIndexDir = FSDirectory.getDirectory(f);
} else {
log.info("using RAM based spell directory");
}
spellChecker = new SpellChecker(spellcheckerIndexDir);
} catch (IOException e) {
throw new RuntimeException("Cannot open SpellChecker index", e);
throw new RuntimeException("Cannot open SpellChecker index", e);
}
}
@ -83,8 +104,18 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
SolrParams p = req.getParams();
String words = p.get("q");
String cmd = p.get("cmd");
if (cmd != null && cmd.equals("rebuild"))
if (cmd != null) {
cmd = cmd.trim();
if (cmd.equals("rebuild")) {
rebuild(req);
rsp.add("cmdExecuted","rebuild");
} else if (cmd.equals("reopen")) {
reopen();
rsp.add("cmdExecuted","reopen");
} else {
throw new SolrException(400, "Unrecognized Command: " + cmd);
}
}
Float accuracy;
int numSug;
@ -100,20 +131,38 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
}
String[] suggestions = spellChecker.suggestSimilar(words, numSug,
reader, restrictToField, onlyMorePopular);
if (null != words && !"".equals(words.trim())) {
String[] suggestions =
spellChecker.suggestSimilar(words, numSug,
nullReader, restrictToField,
onlyMorePopular);
rsp.add("suggestions", Arrays.asList(suggestions));
rsp.add("suggestions", Arrays.asList(suggestions));
}
}
/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
* index pointed to by the current {@link IndexSearcher}.
*/
private void rebuild(SolrQueryRequest req) throws IOException {
private void rebuild(SolrQueryRequest req) throws IOException, SolrException {
if (null == termSourceField) {
throw new SolrException
(500, "can't rebuild spellchecker index without termSourceField configured");
}
IndexReader indexReader = req.getSearcher().getReader();
Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
spellChecker.indexDictionary(dictionary);
spellChecker.setSpellIndex(FSDirectory.getDirectory(spellcheckerIndexDir));
reopen();
}
/**
* Reopens the SpellChecker index directory.
* Useful if an external process is responsible for building
* the spell checker index.
*/
private void reopen() throws IOException {
spellChecker.setSpellIndex(spellcheckerIndexDir);
}
//////////////////////// SolrInfoMBeans methods //////////////////////
@ -123,7 +172,7 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
}
public String getDescription() {
return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
return "The SpellChecker Solr request handler for SpellChecker index: " + dirDescription;
}
public String getSourceId() {