mirror of https://github.com/apache/lucene.git
SOLR-81: more spellchecker enhancements, cleaned up schema/solrconfig, added support for relative paths as well as RAM based spellchecker directories, fixed NPE when using cmd=rebuild, added cmd=reopen
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@522628 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
876dd28846
commit
d50b65bc82
|
@ -128,6 +128,10 @@ New Features
|
|||
17. SOLR-182: allow lazy loading of request handlers on first request.
|
||||
(Ryan McKinley via yonik)
|
||||
|
||||
18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
|
||||
support for relative or absolute directory path configurations, as
|
||||
well as RAM based directory. (hossman)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. Highlighting using DisMax will only pick up terms from the main
|
||||
user query, not boost or filter queries (klaas).
|
||||
|
|
|
@ -252,6 +252,14 @@
|
|||
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
|
||||
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
||||
|
||||
<!-- Some sample docs exists solely to demonstrate the spellchecker
|
||||
functionality, this is the only field they container.
|
||||
Typically you might build the spellchecker of "catchall" type field
|
||||
containing all of the text in each document
|
||||
-->
|
||||
<field name="word" type="string" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
via copyField further on in this schema -->
|
||||
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
|
|
|
@ -353,11 +353,20 @@
|
|||
<lst name="defaults">
|
||||
<int name="suggestionCount">1</int>
|
||||
<float name="accuracy">0.5</float>
|
||||
<str name="version">1.0</str>
|
||||
</lst>
|
||||
<!-- main init params for handler -->
|
||||
<str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
|
||||
|
||||
<!-- Main init params for handler -->
|
||||
|
||||
<!-- The directory where your SpellChecker Index should live. -->
|
||||
<!-- May by absolute, or relative to the Solr "dataDir" directory. -->
|
||||
<!-- If this option is not specified, a ram directory will be used -->
|
||||
<str name="spellcheckerIndexDir">spell</str>
|
||||
|
||||
<!-- the field in your schema that you want to be able to build -->
|
||||
<!-- your spell index on. This should be a field that uses a very -->
|
||||
<!-- simple FieldType without a lot of Analysis (ie: string) -->
|
||||
<str name="termSourceField">word</str>
|
||||
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
|
|
@ -22,15 +22,21 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.LuceneDictionary;
|
||||
import org.apache.lucene.search.spell.SpellChecker;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.solr.request.SolrParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
import org.apache.solr.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Arrays;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* Takes a string (e.g. a query string) as the value of the "q" parameter
|
||||
|
@ -42,8 +48,10 @@ import java.util.Arrays;
|
|||
*/
|
||||
public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||
|
||||
private static Logger log = Logger.getLogger(SpellCheckerRequestHandler.class.getName());
|
||||
|
||||
private SpellChecker spellChecker;
|
||||
|
||||
|
||||
/*
|
||||
* From http://wiki.apache.org/jakarta-lucene/SpellChecker
|
||||
* If reader and restrictToField are both not null:
|
||||
|
@ -57,11 +65,12 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
* return only the words more frequent than this.
|
||||
*
|
||||
*/
|
||||
private static IndexReader reader = null;
|
||||
private static IndexReader nullReader = null;
|
||||
private String restrictToField = null;
|
||||
private boolean onlyMorePopular = false;
|
||||
|
||||
private String spellcheckerIndexDir;
|
||||
private Directory spellcheckerIndexDir = new RAMDirectory();
|
||||
private String dirDescription = "(ramdir)";
|
||||
private String termSourceField;
|
||||
private static final float DEFAULT_ACCURACY = 0.5f;
|
||||
private static final int DEFAULT_NUM_SUGGESTIONS = 1;
|
||||
|
@ -70,11 +79,23 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
super.init(args);
|
||||
SolrParams p = SolrParams.toSolrParams(args);
|
||||
termSourceField = p.get("termSourceField");
|
||||
spellcheckerIndexDir = p.get("spellcheckerIndexDir");
|
||||
|
||||
try {
|
||||
spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
|
||||
String dir = p.get("spellcheckerIndexDir");
|
||||
if (null != dir) {
|
||||
File f = new File(dir);
|
||||
if ( ! f.isAbsolute() ) {
|
||||
f = new File(SolrCore.getSolrCore().getDataDir(), dir);
|
||||
}
|
||||
dirDescription = f.getAbsolutePath();
|
||||
log.info("using spell directory: " + dirDescription);
|
||||
spellcheckerIndexDir = FSDirectory.getDirectory(f);
|
||||
} else {
|
||||
log.info("using RAM based spell directory");
|
||||
}
|
||||
spellChecker = new SpellChecker(spellcheckerIndexDir);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Cannot open SpellChecker index", e);
|
||||
throw new RuntimeException("Cannot open SpellChecker index", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,8 +104,18 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
SolrParams p = req.getParams();
|
||||
String words = p.get("q");
|
||||
String cmd = p.get("cmd");
|
||||
if (cmd != null && cmd.equals("rebuild"))
|
||||
if (cmd != null) {
|
||||
cmd = cmd.trim();
|
||||
if (cmd.equals("rebuild")) {
|
||||
rebuild(req);
|
||||
rsp.add("cmdExecuted","rebuild");
|
||||
} else if (cmd.equals("reopen")) {
|
||||
reopen();
|
||||
rsp.add("cmdExecuted","reopen");
|
||||
} else {
|
||||
throw new SolrException(400, "Unrecognized Command: " + cmd);
|
||||
}
|
||||
}
|
||||
|
||||
Float accuracy;
|
||||
int numSug;
|
||||
|
@ -100,20 +131,38 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
|
||||
}
|
||||
|
||||
String[] suggestions = spellChecker.suggestSimilar(words, numSug,
|
||||
reader, restrictToField, onlyMorePopular);
|
||||
|
||||
rsp.add("suggestions", Arrays.asList(suggestions));
|
||||
if (null != words && !"".equals(words.trim())) {
|
||||
String[] suggestions =
|
||||
spellChecker.suggestSimilar(words, numSug,
|
||||
nullReader, restrictToField,
|
||||
onlyMorePopular);
|
||||
|
||||
rsp.add("suggestions", Arrays.asList(suggestions));
|
||||
}
|
||||
}
|
||||
|
||||
/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
|
||||
* index pointed to by the current {@link IndexSearcher}.
|
||||
*/
|
||||
private void rebuild(SolrQueryRequest req) throws IOException {
|
||||
private void rebuild(SolrQueryRequest req) throws IOException, SolrException {
|
||||
if (null == termSourceField) {
|
||||
throw new SolrException
|
||||
(500, "can't rebuild spellchecker index without termSourceField configured");
|
||||
}
|
||||
|
||||
IndexReader indexReader = req.getSearcher().getReader();
|
||||
Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
|
||||
spellChecker.indexDictionary(dictionary);
|
||||
spellChecker.setSpellIndex(FSDirectory.getDirectory(spellcheckerIndexDir));
|
||||
reopen();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reopens the SpellChecker index directory.
|
||||
* Useful if an external process is responsible for building
|
||||
* the spell checker index.
|
||||
*/
|
||||
private void reopen() throws IOException {
|
||||
spellChecker.setSpellIndex(spellcheckerIndexDir);
|
||||
}
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
@ -123,7 +172,7 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
|
||||
return "The SpellChecker Solr request handler for SpellChecker index: " + dirDescription;
|
||||
}
|
||||
|
||||
public String getSourceId() {
|
||||
|
|
Loading…
Reference in New Issue