mirror of https://github.com/apache/lucene.git
SOLR-81: more spellchecker enhancements, cleaned up schema/solrconfig, added support for relative paths as well as RAM based spellchecker directories, fixed NPE when using cmd=rebuild, added cmd=reopen
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@522628 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
876dd28846
commit
d50b65bc82
|
@ -128,6 +128,10 @@ New Features
|
||||||
17. SOLR-182: allow lazy loading of request handlers on first request.
|
17. SOLR-182: allow lazy loading of request handlers on first request.
|
||||||
(Ryan McKinley via yonik)
|
(Ryan McKinley via yonik)
|
||||||
|
|
||||||
|
18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
|
||||||
|
support for relative or absolute directory path configurations, as
|
||||||
|
well as RAM based directory. (hossman)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
1. Highlighting using DisMax will only pick up terms from the main
|
1. Highlighting using DisMax will only pick up terms from the main
|
||||||
user query, not boost or filter queries (klaas).
|
user query, not boost or filter queries (klaas).
|
||||||
|
|
|
@ -252,6 +252,14 @@
|
||||||
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
|
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
|
||||||
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<!-- Some sample docs exists solely to demonstrate the spellchecker
|
||||||
|
functionality, this is the only field they container.
|
||||||
|
Typically you might build the spellchecker of "catchall" type field
|
||||||
|
containing all of the text in each document
|
||||||
|
-->
|
||||||
|
<field name="word" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- catchall field, containing all other searchable text fields (implemented
|
<!-- catchall field, containing all other searchable text fields (implemented
|
||||||
via copyField further on in this schema -->
|
via copyField further on in this schema -->
|
||||||
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
|
@ -353,11 +353,20 @@
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
<int name="suggestionCount">1</int>
|
<int name="suggestionCount">1</int>
|
||||||
<float name="accuracy">0.5</float>
|
<float name="accuracy">0.5</float>
|
||||||
<str name="version">1.0</str>
|
|
||||||
</lst>
|
</lst>
|
||||||
<!-- main init params for handler -->
|
|
||||||
<str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
|
<!-- Main init params for handler -->
|
||||||
|
|
||||||
|
<!-- The directory where your SpellChecker Index should live. -->
|
||||||
|
<!-- May by absolute, or relative to the Solr "dataDir" directory. -->
|
||||||
|
<!-- If this option is not specified, a ram directory will be used -->
|
||||||
|
<str name="spellcheckerIndexDir">spell</str>
|
||||||
|
|
||||||
|
<!-- the field in your schema that you want to be able to build -->
|
||||||
|
<!-- your spell index on. This should be a field that uses a very -->
|
||||||
|
<!-- simple FieldType without a lot of Analysis (ie: string) -->
|
||||||
<str name="termSourceField">word</str>
|
<str name="termSourceField">word</str>
|
||||||
|
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -22,15 +22,21 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
import org.apache.lucene.search.spell.LuceneDictionary;
|
import org.apache.lucene.search.spell.LuceneDictionary;
|
||||||
import org.apache.lucene.search.spell.SpellChecker;
|
import org.apache.lucene.search.spell.SpellChecker;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.solr.request.SolrParams;
|
import org.apache.solr.request.SolrParams;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryResponse;
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
import org.apache.solr.util.NamedList;
|
import org.apache.solr.util.NamedList;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.core.SolrException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes a string (e.g. a query string) as the value of the "q" parameter
|
* Takes a string (e.g. a query string) as the value of the "q" parameter
|
||||||
|
@ -42,8 +48,10 @@ import java.util.Arrays;
|
||||||
*/
|
*/
|
||||||
public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
|
|
||||||
|
private static Logger log = Logger.getLogger(SpellCheckerRequestHandler.class.getName());
|
||||||
|
|
||||||
private SpellChecker spellChecker;
|
private SpellChecker spellChecker;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* From http://wiki.apache.org/jakarta-lucene/SpellChecker
|
* From http://wiki.apache.org/jakarta-lucene/SpellChecker
|
||||||
* If reader and restrictToField are both not null:
|
* If reader and restrictToField are both not null:
|
||||||
|
@ -57,11 +65,12 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
* return only the words more frequent than this.
|
* return only the words more frequent than this.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private static IndexReader reader = null;
|
private static IndexReader nullReader = null;
|
||||||
private String restrictToField = null;
|
private String restrictToField = null;
|
||||||
private boolean onlyMorePopular = false;
|
private boolean onlyMorePopular = false;
|
||||||
|
|
||||||
private String spellcheckerIndexDir;
|
private Directory spellcheckerIndexDir = new RAMDirectory();
|
||||||
|
private String dirDescription = "(ramdir)";
|
||||||
private String termSourceField;
|
private String termSourceField;
|
||||||
private static final float DEFAULT_ACCURACY = 0.5f;
|
private static final float DEFAULT_ACCURACY = 0.5f;
|
||||||
private static final int DEFAULT_NUM_SUGGESTIONS = 1;
|
private static final int DEFAULT_NUM_SUGGESTIONS = 1;
|
||||||
|
@ -70,11 +79,23 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
super.init(args);
|
super.init(args);
|
||||||
SolrParams p = SolrParams.toSolrParams(args);
|
SolrParams p = SolrParams.toSolrParams(args);
|
||||||
termSourceField = p.get("termSourceField");
|
termSourceField = p.get("termSourceField");
|
||||||
spellcheckerIndexDir = p.get("spellcheckerIndexDir");
|
|
||||||
try {
|
try {
|
||||||
spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
|
String dir = p.get("spellcheckerIndexDir");
|
||||||
|
if (null != dir) {
|
||||||
|
File f = new File(dir);
|
||||||
|
if ( ! f.isAbsolute() ) {
|
||||||
|
f = new File(SolrCore.getSolrCore().getDataDir(), dir);
|
||||||
|
}
|
||||||
|
dirDescription = f.getAbsolutePath();
|
||||||
|
log.info("using spell directory: " + dirDescription);
|
||||||
|
spellcheckerIndexDir = FSDirectory.getDirectory(f);
|
||||||
|
} else {
|
||||||
|
log.info("using RAM based spell directory");
|
||||||
|
}
|
||||||
|
spellChecker = new SpellChecker(spellcheckerIndexDir);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Cannot open SpellChecker index", e);
|
throw new RuntimeException("Cannot open SpellChecker index", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,8 +104,18 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
SolrParams p = req.getParams();
|
SolrParams p = req.getParams();
|
||||||
String words = p.get("q");
|
String words = p.get("q");
|
||||||
String cmd = p.get("cmd");
|
String cmd = p.get("cmd");
|
||||||
if (cmd != null && cmd.equals("rebuild"))
|
if (cmd != null) {
|
||||||
|
cmd = cmd.trim();
|
||||||
|
if (cmd.equals("rebuild")) {
|
||||||
rebuild(req);
|
rebuild(req);
|
||||||
|
rsp.add("cmdExecuted","rebuild");
|
||||||
|
} else if (cmd.equals("reopen")) {
|
||||||
|
reopen();
|
||||||
|
rsp.add("cmdExecuted","reopen");
|
||||||
|
} else {
|
||||||
|
throw new SolrException(400, "Unrecognized Command: " + cmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Float accuracy;
|
Float accuracy;
|
||||||
int numSug;
|
int numSug;
|
||||||
|
@ -100,20 +131,38 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
|
throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] suggestions = spellChecker.suggestSimilar(words, numSug,
|
if (null != words && !"".equals(words.trim())) {
|
||||||
reader, restrictToField, onlyMorePopular);
|
String[] suggestions =
|
||||||
|
spellChecker.suggestSimilar(words, numSug,
|
||||||
rsp.add("suggestions", Arrays.asList(suggestions));
|
nullReader, restrictToField,
|
||||||
|
onlyMorePopular);
|
||||||
|
|
||||||
|
rsp.add("suggestions", Arrays.asList(suggestions));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
|
/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
|
||||||
* index pointed to by the current {@link IndexSearcher}.
|
* index pointed to by the current {@link IndexSearcher}.
|
||||||
*/
|
*/
|
||||||
private void rebuild(SolrQueryRequest req) throws IOException {
|
private void rebuild(SolrQueryRequest req) throws IOException, SolrException {
|
||||||
|
if (null == termSourceField) {
|
||||||
|
throw new SolrException
|
||||||
|
(500, "can't rebuild spellchecker index without termSourceField configured");
|
||||||
|
}
|
||||||
|
|
||||||
IndexReader indexReader = req.getSearcher().getReader();
|
IndexReader indexReader = req.getSearcher().getReader();
|
||||||
Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
|
Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
|
||||||
spellChecker.indexDictionary(dictionary);
|
spellChecker.indexDictionary(dictionary);
|
||||||
spellChecker.setSpellIndex(FSDirectory.getDirectory(spellcheckerIndexDir));
|
reopen();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reopens the SpellChecker index directory.
|
||||||
|
* Useful if an external process is responsible for building
|
||||||
|
* the spell checker index.
|
||||||
|
*/
|
||||||
|
private void reopen() throws IOException {
|
||||||
|
spellChecker.setSpellIndex(spellcheckerIndexDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||||
|
@ -123,7 +172,7 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDescription() {
|
public String getDescription() {
|
||||||
return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
|
return "The SpellChecker Solr request handler for SpellChecker index: " + dirDescription;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getSourceId() {
|
public String getSourceId() {
|
||||||
|
|
Loading…
Reference in New Issue