mirror of https://github.com/apache/lucene.git
SOLR-395: spell checker upgrade
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@592129 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
009a33ce22
commit
4b3ae817b7
10
CHANGES.txt
10
CHANGES.txt
|
@ -136,13 +136,19 @@ New Features
|
|||
to the detailed field information from the solrj client API.
|
||||
(Grant Ingersoll via ehatcher)
|
||||
|
||||
26. SOLR-334L Pluggable query parsers. Allows specification of query
|
||||
26. SOLR-334: Pluggable query parsers. Allows specification of query
|
||||
type and arguments as a prefix on a query string. (yonik)
|
||||
|
||||
27. SOLR-351L External Value Source. An external file may be used
|
||||
27. SOLR-351: External Value Source. An external file may be used
|
||||
to specify the values of a field, currently usable as
|
||||
a ValueSource in a FunctionQuery. (yonik)
|
||||
|
||||
28. SOLR-395: Many new features for the spell checker implementation, including
|
||||
an extended response mode with much richer output, multi-word spell checking,
|
||||
and a bevy of new and renamed options (see the wiki).
|
||||
(Mike Krimerman, Scott Taber via klaas).
|
||||
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.LuceneDictionary;
|
||||
|
@ -30,7 +31,9 @@ import org.apache.solr.request.SolrQueryResponse;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.HiFrequencyDictionary;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -42,6 +45,141 @@ import java.util.logging.Logger;
|
|||
* Takes a string (e.g. a query string) as the value of the "q" parameter
|
||||
* and looks up alternative spelling suggestions in the spellchecker.
|
||||
* The spellchecker used by this handler is the Lucene contrib SpellChecker.
|
||||
*
|
||||
<style>
|
||||
pre.code
|
||||
{
|
||||
border: 1pt solid #AEBDCC;
|
||||
background-color: #F3F5F7;
|
||||
padding: 5pt;
|
||||
font-family: courier, monospace;
|
||||
white-space: pre;
|
||||
// begin css 3 or browser specific rules - do not remove!
|
||||
//see: http://forums.techguy.org/archive/index.php/t-249849.html
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
white-space: -moz-pre-wrap;
|
||||
white-space: -pre-wrap;
|
||||
white-space: -o-pre-wrap;
|
||||
// end css 3 or browser specific rules
|
||||
}
|
||||
|
||||
</style>
|
||||
*
|
||||
* <p>The results identifies the original words echoing it as an entry with the
|
||||
* name of "words" and original word value. It
|
||||
* also identifies if the requested "words" is contained in the index through
|
||||
* the use of the exist true/false name value. Examples of these output
|
||||
* parameters in the standard output format is as follows:</p>
|
||||
* <pre class="code">
|
||||
<str name="words">facial</str>
|
||||
<str name="exist">true</str> </pre>
|
||||
*
|
||||
* <p>If a query string parameter of "multiWords" is used, then each word within the
|
||||
* "q" parameter (seperated by a space or +) will
|
||||
* be iterated through the spell checker and will be wrapped in an
|
||||
* NamedList. Each word will then get its own set of results: words, exists, and
|
||||
* suggestions.</p>
|
||||
*
|
||||
* <p>Examples of the use of the standard ouput (XML) without and with the
|
||||
* use of the "multiWords" parameter are as follows.</p>
|
||||
*
|
||||
* <p> The following URL
|
||||
* examples were configured with the solr.SpellCheckerRequestHandler
|
||||
* named as "/spellchecker".</p>
|
||||
*
|
||||
* <p>Without the use of "extendedResults" and one word
|
||||
* spelled correctly: facial </p>
|
||||
* <pre class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&q=facial</pre>
|
||||
* <pre class="code">
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<response>
|
||||
|
||||
<lst name="responseHeader">
|
||||
<int name="status">0</int>
|
||||
<int name="QTime">6</int>
|
||||
</lst>
|
||||
<str name="words">facial</str>
|
||||
<str name="exist">true</str>
|
||||
<arr name="suggestions">
|
||||
<str>faciale</str>
|
||||
<str>faucial</str>
|
||||
<str>fascial</str>
|
||||
<str>facing</str>
|
||||
<str>faciei</str>
|
||||
<str>facialis</str>
|
||||
<str>social</str>
|
||||
<str>facile</str>
|
||||
<str>spacial</str>
|
||||
<str>glacial</str>
|
||||
<str>marcial</str>
|
||||
<str>facies</str>
|
||||
<str>facio</str>
|
||||
</arr>
|
||||
</response> </pre>
|
||||
*
|
||||
* <p>Without the use of "extendedResults" and two words,
|
||||
* one spelled correctly and one misspelled: facial salophosphoprotein </p>
|
||||
* <pre class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&q=facial+salophosphoprotein</pre>
|
||||
* <pre class="code">
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<response>
|
||||
|
||||
<lst name="responseHeader">
|
||||
<int name="status">0</int>
|
||||
<int name="QTime">18</int>
|
||||
</lst>
|
||||
<str name="words">facial salophosphoprotein</str>
|
||||
<str name="exist">false</str>
|
||||
<arr name="suggestions">
|
||||
<str>sialophosphoprotein</str>
|
||||
</arr>
|
||||
</response> </pre>
|
||||
*
|
||||
*
|
||||
* <p>With the use of "extendedResults" and two words,
|
||||
* one spelled correctly and one misspelled: facial salophosphoprotein </p>
|
||||
* <pre class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&extendedResults=true&q=facial+salophosphoprotein</pre>
|
||||
* <pre class="code">
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<response>
|
||||
|
||||
<lst name="responseHeader">
|
||||
<int name="status">0</int>
|
||||
<int name="QTime">23</int>
|
||||
</lst>
|
||||
<lst name="result">
|
||||
<lst name="facial">
|
||||
<int name="frequency">1</int>
|
||||
<lst name="suggestions">
|
||||
<lst name="faciale"><int name="frequency">1</int></lst>
|
||||
<lst name="faucial"><int name="frequency">1</int></lst>
|
||||
<lst name="fascial"><int name="frequency">1</int></lst>
|
||||
<lst name="facing"><int name="frequency">1</int></lst>
|
||||
<lst name="faciei"><int name="frequency">1</int></lst>
|
||||
<lst name="facialis"><int name="frequency">1</int></lst>
|
||||
<lst name="social"><int name="frequency">1</int></lst>
|
||||
<lst name="facile"><int name="frequency">1</int></lst>
|
||||
<lst name="spacial"><int name="frequency">1</int></lst>
|
||||
<lst name="glacial"><int name="frequency">1</int></lst>
|
||||
<lst name="marcial"><int name="frequency">1</int></lst>
|
||||
<lst name="facies"><int name="frequency">1</int></lst>
|
||||
<lst name="facio"><int name="frequency">1</int></lst>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="salophosphoprotein">
|
||||
<int name="frequency">0</int>
|
||||
<lst name="suggestions">
|
||||
<lst name="sialophosphoprotein"><int name="frequency">1</int></lst>
|
||||
<lst name="phosphoprotein"><int name="frequency">1</int></lst>
|
||||
<lst name="phosphoproteins"><int name="frequency">1</int></lst>
|
||||
<lst name="alphalipoprotein"><int name="frequency">1</int></lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</response> </pre>
|
||||
|
||||
*
|
||||
* @see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">The Lucene Spellchecker documentation</a>
|
||||
*
|
||||
*/
|
||||
|
@ -64,22 +202,37 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
* return only the words more frequent than this.
|
||||
*
|
||||
*/
|
||||
private boolean onlyMorePopular = false;
|
||||
|
||||
private Directory spellcheckerIndexDir = new RAMDirectory();
|
||||
private String dirDescription = "(ramdir)";
|
||||
private String termSourceField;
|
||||
|
||||
private static final String PREFIX = "sp.";
|
||||
private static final String QUERY_PREFIX = PREFIX + "query.";
|
||||
private static final String DICTIONARY_PREFIX = PREFIX + "dictionary.";
|
||||
|
||||
private static final String SOURCE_FIELD = DICTIONARY_PREFIX + "termSourceField";
|
||||
private static final String INDEX_DIR = DICTIONARY_PREFIX + "indexDir";
|
||||
private static final String THRESHOLD = DICTIONARY_PREFIX + "threshold";
|
||||
|
||||
private static final String ACCURACY = QUERY_PREFIX + "accuracy";
|
||||
private static final String SUGGESTIONS = QUERY_PREFIX + "suggestionCount";
|
||||
private static final String POPULAR = QUERY_PREFIX + "onlyMorePopular";
|
||||
private static final String EXTENDED = QUERY_PREFIX + "extendedResults";
|
||||
|
||||
private static final float DEFAULT_ACCURACY = 0.5f;
|
||||
private static final int DEFAULT_NUM_SUGGESTIONS = 1;
|
||||
private static final int DEFAULT_SUGGESTION_COUNT = 1;
|
||||
private static final boolean DEFAULT_MORE_POPULAR = false;
|
||||
private static final boolean DEFAULT_EXTENDED_RESULTS = false;
|
||||
private static final float DEFAULT_DICTIONARY_THRESHOLD = 0.0f;
|
||||
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
SolrParams p = SolrParams.toSolrParams(args);
|
||||
termSourceField = p.get("termSourceField");
|
||||
termSourceField = p.get(SOURCE_FIELD, p.get("termSourceField"));
|
||||
|
||||
try {
|
||||
String dir = p.get("spellcheckerIndexDir");
|
||||
String dir = p.get(INDEX_DIR, p.get("spellcheckerIndexDir"));
|
||||
if (null != dir) {
|
||||
File f = new File(dir);
|
||||
if ( ! f.isAbsolute() ) {
|
||||
|
@ -97,6 +250,10 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the following query string parameters: q, multiWords, cmd rebuild,
|
||||
* cmd reopen, accuracy, suggestionCount, restrictToField, and onlyMorePopular.
|
||||
*/
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
|
||||
throws Exception {
|
||||
SolrParams p = req.getParams();
|
||||
|
@ -115,36 +272,78 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
}
|
||||
}
|
||||
|
||||
// empty query string
|
||||
if (null == words || "".equals(words.trim())) {
|
||||
return;
|
||||
}
|
||||
|
||||
IndexReader indexReader = null;
|
||||
String suggestionField = null;
|
||||
Float accuracy;
|
||||
int numSug;
|
||||
boolean onlyMorePopular;
|
||||
boolean extendedResults;
|
||||
try {
|
||||
accuracy = p.getFloat("accuracy", DEFAULT_ACCURACY);
|
||||
accuracy = p.getFloat(ACCURACY, p.getFloat("accuracy", DEFAULT_ACCURACY));
|
||||
spellChecker.setAccuracy(accuracy);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new RuntimeException("Accuracy must be a valid positive float", e);
|
||||
}
|
||||
try {
|
||||
numSug = p.getInt("suggestionCount", DEFAULT_NUM_SUGGESTIONS);
|
||||
numSug = p.getInt(SUGGESTIONS, p.getInt("suggestionCount", DEFAULT_SUGGESTION_COUNT));
|
||||
} catch (NumberFormatException e) {
|
||||
throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);
|
||||
}
|
||||
try {
|
||||
onlyMorePopular = p.getBool("onlyMorePopular", DEFAULT_MORE_POPULAR);
|
||||
} catch (NumberFormatException e) {
|
||||
onlyMorePopular = p.getBool(POPULAR, DEFAULT_MORE_POPULAR);
|
||||
} catch (SolrException e) {
|
||||
throw new RuntimeException("'Only more popular' must be a valid boolean", e);
|
||||
}
|
||||
try {
|
||||
extendedResults = p.getBool(EXTENDED, DEFAULT_EXTENDED_RESULTS);
|
||||
} catch (SolrException e) {
|
||||
throw new RuntimeException("'Extended results' must be a valid boolean", e);
|
||||
}
|
||||
|
||||
// when searching for more popular, a non null index-reader and
|
||||
// when searching for more popular, a non null index-reader and
|
||||
// restricted-field are required
|
||||
if (onlyMorePopular) {
|
||||
if (onlyMorePopular || extendedResults) {
|
||||
indexReader = req.getSearcher().getReader();
|
||||
suggestionField = termSourceField;
|
||||
}
|
||||
|
||||
if (extendedResults) {
|
||||
|
||||
if (null != words && !"".equals(words.trim())) {
|
||||
SimpleOrderedMap<Object> results = new SimpleOrderedMap<Object>();
|
||||
String[] wordz = words.split(" ");
|
||||
for (String word : wordz)
|
||||
{
|
||||
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
|
||||
nl.add("frequency", indexReader.docFreq(new Term(suggestionField, word)));
|
||||
String[] suggestions =
|
||||
spellChecker.suggestSimilar(word, numSug,
|
||||
indexReader, suggestionField, onlyMorePopular);
|
||||
|
||||
// suggestion array
|
||||
NamedList<Object> sa = new NamedList<Object>();
|
||||
for (int i=0; i<suggestions.length; i++) {
|
||||
// suggestion item
|
||||
SimpleOrderedMap<Object> si = new SimpleOrderedMap<Object>();
|
||||
si.add("frequency", indexReader.docFreq(new Term(termSourceField, suggestions[i])));
|
||||
sa.add(suggestions[i], si);
|
||||
}
|
||||
nl.add("suggestions", sa);
|
||||
results.add(word, nl);
|
||||
}
|
||||
rsp.add( "result", results );
|
||||
|
||||
} else {
|
||||
rsp.add("words", words);
|
||||
if (spellChecker.exist(words)) {
|
||||
rsp.add("exist","true");
|
||||
} else {
|
||||
rsp.add("exist","false");
|
||||
}
|
||||
String[] suggestions =
|
||||
spellChecker.suggestSimilar(words, numSug,
|
||||
indexReader, suggestionField,
|
||||
|
@ -156,6 +355,7 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
|
||||
/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the
|
||||
* index pointed to by the current {@link IndexSearcher}.
|
||||
* Any word appearing in less that thresh documents will not be added to the spellcheck index.
|
||||
*/
|
||||
private void rebuild(SolrQueryRequest req) throws IOException, SolrException {
|
||||
if (null == termSourceField) {
|
||||
|
@ -163,8 +363,15 @@ public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
|||
(SolrException.ErrorCode.SERVER_ERROR, "can't rebuild spellchecker index without termSourceField configured");
|
||||
}
|
||||
|
||||
Float threshold;
|
||||
try {
|
||||
threshold = req.getParams().getFloat("sp.dictionary.threshold", DEFAULT_DICTIONARY_THRESHOLD);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new RuntimeException("Threshold must be a valid positive float", e);
|
||||
}
|
||||
|
||||
IndexReader indexReader = req.getSearcher().getReader();
|
||||
Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
|
||||
Dictionary dictionary = new HiFrequencyDictionary(indexReader, termSourceField, threshold);
|
||||
spellChecker.clearIndex();
|
||||
spellChecker.indexDictionary(dictionary);
|
||||
reopen();
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
package org.apache.solr.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
|
||||
/**
|
||||
* Hi Frequency Dictionary: terms taken from the given field
|
||||
* of a Lucene index, which appear in a number of documents
|
||||
* above a given threshold.
|
||||
*
|
||||
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
|
||||
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
|
||||
*
|
||||
* Threshold is a value in [0..1] representing the minimum
|
||||
* number of documents (of the total) where a term should appear.
|
||||
*
|
||||
* @author Mike Krimerman
|
||||
*
|
||||
* Based on LuceneDictionary, by
|
||||
* @author Nicolas Maisonneuve
|
||||
* @author Christian Mallwitz
|
||||
*/
|
||||
public class HiFrequencyDictionary implements Dictionary {
|
||||
private IndexReader reader;
|
||||
private String field;
|
||||
private float thresh;
|
||||
|
||||
public HiFrequencyDictionary(IndexReader reader, String field, float thresh) {
|
||||
this.reader = reader;
|
||||
this.field = field.intern();
|
||||
this.thresh = thresh;
|
||||
}
|
||||
|
||||
public final Iterator getWordsIterator() {
|
||||
return new HiFrequencyIterator();
|
||||
}
|
||||
|
||||
|
||||
final class HiFrequencyIterator implements Iterator {
|
||||
private TermEnum termEnum;
|
||||
private Term actualTerm;
|
||||
private boolean hasNextCalled;
|
||||
private int minNumDocs;
|
||||
|
||||
HiFrequencyIterator() {
|
||||
try {
|
||||
termEnum = reader.terms(new Term(field, ""));
|
||||
minNumDocs = (int)(thresh * (float)reader.numDocs());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isFrequent(Term term) {
|
||||
try {
|
||||
return reader.docFreq(term) >= minNumDocs;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public Object next() {
|
||||
if (!hasNextCalled) {
|
||||
hasNext();
|
||||
}
|
||||
hasNextCalled = false;
|
||||
|
||||
try {
|
||||
termEnum.next();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return (actualTerm != null) ? actualTerm.text() : null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (hasNextCalled) {
|
||||
return actualTerm != null;
|
||||
}
|
||||
hasNextCalled = true;
|
||||
|
||||
do {
|
||||
actualTerm = termEnum.term();
|
||||
|
||||
// if there are no words return false
|
||||
if (actualTerm == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String currentField = actualTerm.field();
|
||||
|
||||
// if the next word doesn't have the same field return false
|
||||
if (currentField != field) {
|
||||
actualTerm = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
// got a valid term, does it pass the threshold?
|
||||
if (isFrequent(actualTerm)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// term not up to threshold
|
||||
try {
|
||||
termEnum.next();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
} while (true);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,473 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
/**
|
||||
* This is a test case to test the SpellCheckerRequestHandler class.
|
||||
* It tests:
|
||||
* <ul>
|
||||
* <li>The generation of the spell checkers list with a 10 words</li>
|
||||
* <li>The identification of the word that was being spell checked</li>
|
||||
* <li>The confirmation if the word exists or not in the index</li>
|
||||
* <li>The suggested list of a correctly and incorrectly spelled words</li>
|
||||
* <li>The suggestions for both correct and incorrect words</li>
|
||||
* <li>The limitation on the number of suggestions with the
|
||||
* suggestionCount parameter</li>
|
||||
* <li>The usage of the parameter multiWords</li>
|
||||
* </ul>
|
||||
*
|
||||
* Notes/Concerns about this Test Case:
|
||||
* <ul>
|
||||
* <li>This is my first test case for a Solr Handler. As such I am not
|
||||
* familiar with the AbstractSolrTestCase and as such I am not
|
||||
* 100% these test cases will work under the same for each person
|
||||
* who runs the test cases (see next note).</li>
|
||||
* <li>The order of the arrays (arr) may not be consistant on other
|
||||
* systems or different runs, as such these test cases may fail?</li>
|
||||
* <li>Note: I changed //arr/str[1][.='cart'] to //arr/str[.='cart'] and it
|
||||
* appears to work.</li>
|
||||
* <li>The two notations appear to successfully test for the same thing:
|
||||
* "//lst[@name='result']/lst[1][@name='word']/str[@name='words'][.='cat']"
|
||||
* and "//str[@name='words'][.='cat']" which I would think // would indicate
|
||||
* a root node.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class SpellCheckerRequestHandlerTest
|
||||
extends AbstractSolrTestCase
|
||||
{
|
||||
|
||||
@Override
|
||||
public String getSchemaFile() { return "solr/conf/schema-spellchecker.xml"; }
|
||||
|
||||
@Override
|
||||
public String getSolrConfigFile() { return "solr/conf/solrconfig-spellchecker.xml"; }
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void buildSpellCheckIndex()
|
||||
{
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertU("Add some words to the Spell Check Index:",
|
||||
adoc("id", "100",
|
||||
"spell", "solr"));
|
||||
assertU(adoc("id", "101",
|
||||
"spell", "cat"));
|
||||
assertU(adoc("id", "102",
|
||||
"spell", "cart"));
|
||||
assertU(adoc("id", "103",
|
||||
"spell", "carp"));
|
||||
assertU(adoc("id", "104",
|
||||
"spell", "cant"));
|
||||
assertU(adoc("id", "105",
|
||||
"spell", "catnip"));
|
||||
assertU(adoc("id", "106",
|
||||
"spell", "cattails"));
|
||||
assertU(adoc("id", "107",
|
||||
"spell", "cod"));
|
||||
assertU(adoc("id", "108",
|
||||
"spell", "corn"));
|
||||
assertU(adoc("id", "109",
|
||||
"spell", "cot"));
|
||||
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
|
||||
lrf.args.put("cmd","rebuild");
|
||||
assertQ("Need to first build the index:",
|
||||
req("cat")
|
||||
,"//str[@name='cmdExecuted'][.='rebuild']"
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
// ,"//arr[@name='suggestions'][.='']"
|
||||
);
|
||||
lrf.args.clear();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for correct spelling of a single word at various accuracy levels
|
||||
* to see how the suggestions vary.
|
||||
*/
|
||||
public void testSpellCheck_01_correctWords() {
|
||||
|
||||
buildSpellCheckIndex();
|
||||
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".4");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
,"//arr/str[.='cot']"
|
||||
,"//arr/str[.='cart']"
|
||||
// ,"//arr/str[1][.='cot']"
|
||||
// ,"//arr/str[2][.='cart']"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".0");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
,"//arr/str[.='cart']"
|
||||
,"//arr/str[.='cot']"
|
||||
,"//arr/str[.='carp']"
|
||||
,"//arr/str[.='cod']"
|
||||
,"//arr/str[.='corn']"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for correct spelling of a single word at various accuracy levels
|
||||
* to see how the suggestions vary.
|
||||
*/
|
||||
public void testSpellCheck_02_incorrectWords() {
|
||||
|
||||
buildSpellCheckIndex();
|
||||
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertQ("Confirm the index is still valid",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//str[@name='words'][.='coat']"
|
||||
,"//str[@name='exist'][.='false']"
|
||||
,"//arr[@name='suggestions'][.='']"
|
||||
);
|
||||
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//str[@name='words'][.='coat']"
|
||||
,"//str[@name='exist'][.='false']"
|
||||
,"//arr/str[.='cot']"
|
||||
,"//arr/str[.='cat']"
|
||||
,"//arr/str[.='corn']"
|
||||
,"//arr/str[.='cart']"
|
||||
,"//arr/str[.='cod']"
|
||||
,"//arr/str[.='solr']"
|
||||
,"//arr/str[.='carp']"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.suggestionCount", "2");
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//str[@name='words'][.='coat']"
|
||||
,"//str[@name='exist'][.='false']"
|
||||
,"//arr/str[.='cot']"
|
||||
,"//arr/str[.='cat']"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for correct spelling of a single word at various accuracy levels
|
||||
* to see how the suggestions vary.
|
||||
*/
|
||||
public void testSpellCheck_03_multiWords_correctWords() {
|
||||
|
||||
buildSpellCheckIndex();
|
||||
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertQ("Confirm the index is still valid",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
|
||||
// Enable multiWords formatting:
|
||||
lrf.args.put("sp.query.extendedResults", "true");
|
||||
|
||||
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//lst[@name='cat']"
|
||||
,"//lst[@name='cat']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions' and count(lst)=0]"
|
||||
);
|
||||
|
||||
|
||||
// Please note that the following produces the following XML structure.
|
||||
// <response>
|
||||
// <responseHeader>
|
||||
// <status>0</status><QTime>0</QTime>
|
||||
// </responseHeader>
|
||||
// <lst name="result">
|
||||
// <lst name="cat">
|
||||
// <int name="frequency">1</int>
|
||||
// <lst name="suggestions">
|
||||
// <lst name="cart"><int name="frequency">1</int></lst>
|
||||
// <lst name="cot"><int name="frequency">1</int></lst>
|
||||
// <lst name="cod"><int name="frequency">1</int></lst>
|
||||
// <lst name="carp"><int name="frequency">1</int></lst>
|
||||
// </lst>
|
||||
// </lst>
|
||||
// </lst>
|
||||
// </response>
|
||||
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//lst[@name='cat']"
|
||||
,"//lst[@name='cat']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cart']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cot']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cod']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='carp']/int[@name='frequency'][.>0]"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.suggestionCount", "2");
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat")
|
||||
,"//lst[@name='cat']"
|
||||
,"//lst[@name='cat']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cart']"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cot']"
|
||||
);
|
||||
|
||||
/* The following is the generated XML response for the next query with three words:
|
||||
<response>
|
||||
<responseHeader><status>0</status><QTime>0</QTime></responseHeader>
|
||||
<lst name="result">
|
||||
<lst name="cat">
|
||||
<int name="frequency">1</int>
|
||||
<lst name="suggestions">
|
||||
<lst name="cart"><int name="frequency">1</int></lst>
|
||||
<lst name="cot"><int name="frequency">1</int></lst>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="card">
|
||||
<int name="frequency">1</int>
|
||||
<lst name="suggestions">
|
||||
<lst name="carp"><int name="frequency">1</int></lst>
|
||||
<lst name="cat"><int name="frequency">1</int></lst>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="carp">
|
||||
<int name="frequency">1</int>
|
||||
<lst name="suggestions">
|
||||
<lst name="cart"><int name="frequency">1</int></lst>
|
||||
<lst name="corn"><int name="frequency">1</int></lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</response>
|
||||
*/
|
||||
|
||||
lrf.args.put("sp.query.suggestionCount", "2");
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("cat cart carp")
|
||||
,"//lst[@name='cat']"
|
||||
,"//lst[@name='cat']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cart']"
|
||||
,"//lst[@name='cat']/lst[@name='suggestions']/lst[@name='cot']"
|
||||
|
||||
,"//lst[@name='cart']"
|
||||
,"//lst[@name='cart']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='cart']/lst/lst[1]"
|
||||
,"//lst[@name='cart']/lst/lst[2]"
|
||||
|
||||
,"//lst[@name='carp']"
|
||||
,"//lst[@name='carp']/int[@name='frequency'][.>0]"
|
||||
,"//lst[@name='carp']/lst[@name='suggestions']/lst[@name='cart']"
|
||||
,"//lst[@name='carp']/lst[@name='suggestions']/lst[@name='corn']"
|
||||
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for correct spelling of a single word at various accuracy levels
|
||||
* to see how the suggestions vary.
|
||||
*/
|
||||
public void testSpellCheck_04_multiWords_incorrectWords() {
|
||||
|
||||
buildSpellCheckIndex();
|
||||
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertQ("Confirm the index is still valid",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
|
||||
// Enable multiWords formatting:
|
||||
lrf.args.put("sp.query.extendedResults", "true");
|
||||
|
||||
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//lst[@name='coat']"
|
||||
,"//lst[@name='coat']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions' and count(lst)=0]"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//lst[@name='coat']"
|
||||
,"//lst[@name='coat']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='cot']"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='cat']"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='corn']"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='cart']"
|
||||
);
|
||||
|
||||
lrf.args.put("sp.query.suggestionCount", "2");
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("coat")
|
||||
,"//lst[@name='coat']"
|
||||
,"//lst[@name='coat']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='cot']"
|
||||
,"//lst[@name='coat']/lst[@name='suggestions']/lst[@name='cat']"
|
||||
);
|
||||
|
||||
|
||||
|
||||
lrf.args.put("sp.query.suggestionCount", "2");
|
||||
lrf.args.put("sp.query.accuracy",".2");
|
||||
assertQ("Failed to spell check",
|
||||
req("cet cert corp")
|
||||
,"//lst[@name='cet']"
|
||||
,"//lst[@name='cet']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='cet']/lst[@name='suggestions']/lst[1]"
|
||||
,"//lst[@name='cet']/lst[@name='suggestions']/lst[2]"
|
||||
|
||||
,"//lst[@name='cert']"
|
||||
,"//lst[@name='cert']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='cert']/lst[@name='suggestions']/lst[1]"
|
||||
,"//lst[@name='cert']/lst[@name='suggestions']/lst[2]"
|
||||
|
||||
,"//lst[@name='corp']"
|
||||
,"//lst[@name='corp']/int[@name='frequency'][.=0]"
|
||||
,"//lst[@name='corp']/lst[@name='suggestions']/lst[1]"
|
||||
,"//lst[@name='corp']/lst[@name='suggestions']/lst[2]"
|
||||
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
public void testSpellCheck_05_buildDictionary() {
|
||||
lrf = h.getRequestFactory("spellchecker", 0, 20 );
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertU("Add some words to the Spell Check Index:",
|
||||
adoc("id", "100",
|
||||
"spell", "solr cat cart"));
|
||||
assertU(adoc("id", "101",
|
||||
"spell", "cat cart"));
|
||||
assertU(adoc("id", "102",
|
||||
"spell", "cat cart"));
|
||||
assertU(adoc("id", "103",
|
||||
"spell", "cat cart carp"));
|
||||
assertU(adoc("id", "104",
|
||||
"spell", "cat car cant"));
|
||||
assertU(adoc("id", "105",
|
||||
"spell", "cat catnip"));
|
||||
assertU(adoc("id", "106",
|
||||
"spell", "cat cattails"));
|
||||
assertU(adoc("id", "107",
|
||||
"spell", "cat cod"));
|
||||
assertU(adoc("id", "108",
|
||||
"spell", "cat corn"));
|
||||
assertU(adoc("id", "109",
|
||||
"spell", "cat cot"));
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
|
||||
lrf.args.put("sp.dictionary.threshold", "0.20");
|
||||
lrf.args.put("cmd","rebuild");
|
||||
assertQ("Need to first build the index:",
|
||||
req("cat")
|
||||
,"//str[@name='cmdExecuted'][.='rebuild']"
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
lrf.args.clear();
|
||||
lrf.args.put("version","2.0");
|
||||
lrf.args.put("sp.query.accuracy",".9");
|
||||
|
||||
assertQ("Confirm index contains only words above threshold",
|
||||
req("cat")
|
||||
,"//str[@name='words'][.='cat']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
assertQ("Confirm index contains only words above threshold",
|
||||
req("cart")
|
||||
,"//str[@name='words'][.='cart']"
|
||||
,"//str[@name='exist'][.='true']"
|
||||
);
|
||||
|
||||
assertQ("Confirm index contains only words above threshold",
|
||||
req("cod")
|
||||
,"//str[@name='words'][.='cod']"
|
||||
,"//str[@name='exist'][.='false']"
|
||||
);
|
||||
|
||||
assertQ("Confirm index contains only words above threshold",
|
||||
req("corn")
|
||||
,"//str[@name='words'][.='corn']"
|
||||
,"//str[@name='exist'][.='false']"
|
||||
);
|
||||
|
||||
lrf.args.clear();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- This is the Solr schema file. This file should be named "schema.xml" and
|
||||
should be in the conf directory under the solr home
|
||||
(i.e. ./solr/conf/schema.xml by default)
|
||||
or located where the classloader for the Solr webapp can find it.
|
||||
|
||||
For more information, on how to customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml
|
||||
-->
|
||||
|
||||
<schema name="Solr SpellCheck Test" version="1.1">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
||||
not normally be changed by applications.
|
||||
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
||||
1.1: multiValued attribute introduced, false by default -->
|
||||
|
||||
<types>
|
||||
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<fieldtype name="text" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldType name="spellText" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
<fields>
|
||||
<field name="id" type="string" indexed="true" stored="true"/>
|
||||
<field name="spell" type="spellText" indexed="true" stored="true" />
|
||||
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
</fields>
|
||||
|
||||
<!-- field to use to determine and enforce document uniqueness. -->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
|
||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||
<solrQueryParser defaultOperator="OR"/>
|
||||
|
||||
</schema>
|
|
@ -0,0 +1,103 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<indexDefaults>
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<maxBufferedDocs>1000</maxBufferedDocs>
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
<writeLockTimeout>1000</writeLockTimeout>
|
||||
<commitLockTimeout>10000</commitLockTimeout>
|
||||
</indexDefaults>
|
||||
|
||||
<mainIndex>
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<maxBufferedDocs>1000</maxBufferedDocs>
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
<unlockOnStartup>true</unlockOnStartup>
|
||||
</mainIndex>
|
||||
|
||||
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
<commitIntervalLowerBound>0</commitIntervalLowerBound>
|
||||
</updateHandler>
|
||||
|
||||
|
||||
<query>
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
<queryResultWindowSize>10</queryResultWindowSize>
|
||||
<HashDocSet maxSize="3000" loadFactor="0.75"/>
|
||||
<boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
|
||||
</query>
|
||||
|
||||
|
||||
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler" />
|
||||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
|
||||
|
||||
|
||||
<!-- SpellCheckerRequestHandler takes in a word (or several words) as the
|
||||
value of the "q" parameter and returns a list of alternative spelling
|
||||
suggestions. If invoked with a ...&cmd=rebuild, it will rebuild the
|
||||
spellchecker index.
|
||||
-->
|
||||
<requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler" startup="lazy">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<int name="sp.query.suggestionCount">20</int>
|
||||
<float name="sp.query.accuracy">0.60</float>
|
||||
</lst>
|
||||
|
||||
<!-- Main init params for handler -->
|
||||
|
||||
<!-- The directory where your SpellChecker Index should live. -->
|
||||
<!-- May be absolute, or relative to the Solr "dataDir" directory. -->
|
||||
<!-- If this option is not specified, a RAM directory will be used -->
|
||||
<str name="sp.dictionary.spellcheckerIndexDir">spell</str>
|
||||
|
||||
<!-- the field in your schema that you want to be able to build -->
|
||||
<!-- your spell index on. This should be a field that uses a very -->
|
||||
<!-- simple FieldType without a lot of Analysis (ie: string) -->
|
||||
<str name="sp.dictionary.termSourceField">spell</str>
|
||||
|
||||
<!-- threshold for word to make it into the dictionary -->
|
||||
<!-- a word should appear at minimum in the specified precent of documents -->
|
||||
<str name="sp.dictionary.threshold">0.0</str>
|
||||
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
||||
<queryResponseWriter name="standard" class="org.apache.solr.request.XMLResponseWriter"/>
|
||||
<queryResponseWriter name="useless" class="org.apache.solr.OutputWriterTest$UselessOutputWriter"/>
|
||||
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter"/>
|
||||
<queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
|
||||
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>solr</defaultQuery>
|
||||
<gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
|
||||
</admin>
|
||||
|
||||
</config>
|
Loading…
Reference in New Issue