LUCENE-2608: Add per-method and request accuracy to spell checker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@987179 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-08-19 15:01:29 +00:00
parent 4ec28930c9
commit 1620366b5c
14 changed files with 446 additions and 146 deletions

View File

@ -23,6 +23,9 @@ New Features
* LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
* LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class
method is also still available. (Grant Ingersoll)
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread

View File

@ -62,11 +62,16 @@ import org.apache.lucene.util.Version;
*/
public class SpellChecker implements java.io.Closeable {
/**
* The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} .
*/
public static final float DEFAULT_ACCURACY = 0.5f;
/**
* Field name for each word in the ngram index.
*/
public static final String F_WORD = "word";
private static final Term F_WORD_TERM = new Term(F_WORD);
/**
@ -75,35 +80,34 @@ public class SpellChecker implements java.io.Closeable {
// don't modify the directory directly - see #swapSearcher()
// TODO: why is this package private?
Directory spellIndex;
/**
* Boost value for start and end grams
*/
private float bStart = 2.0f;
private float bEnd = 1.0f;
private float bEnd = 1.0f;
// don't use this searcher directly - see #swapSearcher()
private IndexSearcher searcher;
/*
* this locks all modifications to the current searcher.
* this locks all modifications to the current searcher.
*/
private final Object searcherLock = new Object();
/*
* this lock synchronizes all possible modifications to the
* this lock synchronizes all possible modifications to the
* current index directory. It should not be possible to try modifying
* the same index concurrently. Note: Do not acquire the searcher lock
* before acquiring this lock!
* before acquiring this lock!
*/
private final Object modifyCurrentIndexLock = new Object();
private volatile boolean closed = false;
// minimum score for hits generated by the spell checker query
private float minScore = 0.5f;
private StringDistance sd;
private float accuracy = DEFAULT_ACCURACY;
private StringDistance sd;
private Comparator<SuggestWord> comparator;
/**
@ -202,10 +206,20 @@ public class SpellChecker implements java.io.Closeable {
}
/**
* Sets the accuracy 0 &lt; minScore &lt; 1; default 0.5
* Sets the accuracy 0 &lt; minScore &lt; 1; default {@link #DEFAULT_ACCURACY}
* @param acc The new accuracy
*/
public void setAccuracy(float minScore) {
this.minScore = minScore;
public void setAccuracy(float acc) {
this.accuracy = acc;
}
/**
* The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to
* decide whether a suggestion is included or not.
* @return The current accuracy setting
*/
public float getAccuracy() {
return accuracy;
}
/**
@ -224,11 +238,37 @@ public class SpellChecker implements java.io.Closeable {
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[]
*
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/
public String[] suggestSimilar(String word, int numSug) throws IOException {
return this.suggestSimilar(word, numSug, null, null, false);
}
/**
* Suggest similar words.
*
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
* is not the same as the edit distance strategy used to calculate the best
* matching spell-checked word from the hits that Lucene found, one usually has
* to retrieve a couple of numSug's in order to get the true best match.
*
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
* @param word the word you want a spell check done on
* @param numSug the number of suggested words
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[]
*
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
return this.suggestSimilar(word, numSug, null, null, false, accuracy);
}
/**
* Suggest similar words (optionally restricted to a field of an index).
*
@ -240,6 +280,40 @@ public class SpellChecker implements java.io.Closeable {
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
* <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy.
*
* @param word the word you want a spell check done on
* @param numSug the number of suggested words
* @param ir the indexReader of the user index (can be null see field param)
* @param field the field of the user index: if field is not null, the suggested
* words are restricted to the words present in this field.
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
* (only if restricted mode = (indexReader!=null and field!=null)
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[] the sorted list of the suggest words with these 2 criteria:
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index
*
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, boolean morePopular) throws IOException {
return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
}
/**
* Suggest similar words (optionally restricted to a field of an index).
*
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
* is not the same as the edit distance strategy used to calculate the best
* matching spell-checked word from the hits that Lucene found, one usually has
* to retrieve a couple of numSug's in order to get the true best match.
*
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
* @param word the word you want a spell check done on
* @param numSug the number of suggested words
* @param ir the indexReader of the user index (can be null see field param)
@ -247,6 +321,7 @@ public class SpellChecker implements java.io.Closeable {
* words are restricted to the words present in this field.
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
* (only if restricted mode = (indexReader!=null and field!=null)
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[] the sorted list of the suggest words with these 2 criteria:
@ -254,72 +329,72 @@ public class SpellChecker implements java.io.Closeable {
* of the suggest words in the field of the user index
*/
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, boolean morePopular) throws IOException {
String field, boolean morePopular, float accuracy) throws IOException {
// obtainSearcher calls ensureOpen
final IndexSearcher indexSearcher = obtainSearcher();
try{
float min = this.minScore;
final int lengthWord = word.length();
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself
if (!morePopular && freq > 0) {
return new String[] { word };
}
BooleanQuery query = new BooleanQuery();
String[] grams;
String key;
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
key = "gram" + ng; // form key
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
if (grams.length == 0) {
continue; // hmm
}
if (bStart > 0) { // should we boost prefixes?
add(query, "start" + ng, grams[0], bStart); // matches start of word
}
if (bEnd > 0) { // should we boost suffixes
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
}
for (int i = 0; i < grams.length; i++) {
add(query, key, grams[i]);
}
}
int maxHits = 10 * numSug;
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
// System.out.println("HITS: " + hits.length());
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.min(hits.length, maxHits);
SuggestWord sugWord = new SuggestWord();
for (int i = 0; i < stop; i++) {
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
// don't suggest a word for itself, that would be silly
if (sugWord.string.equals(word)) {
continue;
}
// edit distance
sugWord.score = sd.getDistance(word,sugWord.string);
if (sugWord.score < min) {
if (sugWord.score < accuracy) {
continue;
}
if (ir != null && field != null) { // use the user index
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
// don't suggest a word that is not present in the field
@ -330,23 +405,22 @@ public class SpellChecker implements java.io.Closeable {
sugQueue.insertWithOverflow(sugWord);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
min = sugQueue.top().score;
accuracy = sugQueue.top().score;
}
sugWord = new SuggestWord();
}
// convert to array string
String[] list = new String[sugQueue.size()];
for (int i = sugQueue.size() - 1; i >= 0; i--) {
list[i] = sugQueue.pop().string;
}
return list;
} finally {
releaseSearcher(indexSearcher);
}
}
/**
* Add a clause to a boolean query.
*/

View File

@ -104,11 +104,21 @@ public class TestSpellChecker extends LuceneTestCase {
spellChecker.setAccuracy(0.8f);
checkCommonSuggestions(r);
checkJaroWinklerSuggestions();
// the accuracy is set to 0.8 by default, but the best result has a score of 0.925
String[] similar = spellChecker.suggestSimilar("fvie", 2, 0.93f);
assertTrue(similar.length == 0);
similar = spellChecker.suggestSimilar("fvie", 2, 0.92f);
assertTrue(similar.length == 1);
similar = spellChecker.suggestSimilar("fiv", 2);
assertTrue(similar.length > 0);
assertEquals(similar[0], "five");
spellChecker.setStringDistance(new NGramDistance(2));
spellChecker.setAccuracy(0.5f);
checkCommonSuggestions(r);
checkNGramSuggestions();
r.close();
}
@ -127,8 +137,6 @@ public class TestSpellChecker extends LuceneTestCase {
if (!compareSP.isClosed())
compareSP.close();
compIdx.close();
}
private void checkCommonSuggestions(IndexReader r) throws IOException {

View File

@ -77,6 +77,12 @@ Upgrading from Solr 1.4
legacy behavior should set a default value for the 'mm' param in
their solrconfig.xml file.
* LUCENE-2608: Added the ability to specify the accuracy on a per request basis.
Implementations of SolrSpellChecker must change over to the new SolrSpellChecker
abstract methods using the new SpellingOptions class. While this change is not
backward compatible, it should be trivial to migrate as the SpellingOptions class
just encapsulates the parameters that were passed in to the methods before the change. (gsingers)
Detailed Change List
----------------------
@ -226,11 +232,6 @@ New Features
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
* SOLR-2049: Add hl.multiValuedSeparatorChar for FastVectorHighlighter, per LUCENE-2603. (koji)
* SOLR-1881: add a url-scheme config string to SearchHandler to specify alternate
URL prefixes for distributed search shard requests. (Sami Siren via yonik)
Optimizations
----------------------

View File

@ -81,4 +81,9 @@ public interface SpellingParams {
* Take the top suggestion for each token and create a new query from it
*/
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
/**
* Certain spelling implementations may allow for an accuracy setting.
*/
public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
}

View File

@ -23,11 +23,13 @@ import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -144,8 +146,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
NamedList response = new SimpleOrderedMap();
IndexReader reader = rb.req.getSearcher().getReader();
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
SpellingResult spellingResult = spellChecker.getSuggestions(tokens,
reader, count, onlyMorePopular, extendedResults);
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
SolrParams customParams = getCustomParams(getDictionaryName(params), params);
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
accuracy, customParams);
SpellingResult spellingResult = spellChecker.getSuggestions(options);
if (spellingResult != null) {
response.add("suggestions", toNamedList(spellingResult, q,
extendedResults, collate));
@ -159,6 +165,24 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
}
/**
* For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
* XXXX=YYYY as a param to the custom param list
* @param params The original SolrParams
* @return The new Params
*/
protected SolrParams getCustomParams(String dictionary, SolrParams params) {
ModifiableSolrParams result = new ModifiableSolrParams();
Iterator<String> iter = params.getParameterNamesIterator();
String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
while (iter.hasNext()){
String nxt = iter.next();
if (nxt.startsWith(prefix)){
result.add(nxt.substring(prefix.length()), params.getParams(nxt));
}
}
return result;
}
@Override
@ -341,13 +365,17 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
protected SolrSpellChecker getSpellChecker(SolrParams params) {
return spellCheckers.get(getDictionaryName(params));
}
private String getDictionaryName(SolrParams params) {
String dictName = params.get(SPELLCHECK_DICT);
if (dictName == null) {
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
}
return spellCheckers.get(dictName);
return dictName;
}
/**
* @return the spellchecker registered to a given name
*/

View File

@ -150,29 +150,30 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
return name;
}
@SuppressWarnings("unchecked")
public SpellingResult getSuggestions(Collection<Token> tokens,
IndexReader reader, int count, boolean onlyMorePopular,
boolean extendedResults)
throws IOException {
SpellingResult result = new SpellingResult(tokens);
reader = determineReader(reader);
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult(options.tokens);
IndexReader reader = determineReader(options.reader);
Term term = field != null ? new Term(field, "") : null;
for (Token token : tokens) {
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
int count = (int) Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
for (Token token : options.tokens) {
String tokenText = new String(token.buffer(), 0, token.length());
String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
String[] suggestions = spellChecker.suggestSimilar(tokenText,
count,
field != null ? reader : null, //workaround LUCENE-1295
field,
onlyMorePopular);
options.onlyMorePopular, theAccuracy);
if (suggestions.length == 1 && suggestions[0].equals(tokenText)) {
//These are spelled the same, continue on
continue;
}
if (extendedResults == true && reader != null && field != null) {
if (options.extendedResults == true && reader != null && field != null) {
term = term.createTerm(tokenText);
result.add(token, reader.docFreq(term));
int countLimit = Math.min(count, suggestions.length);
int countLimit = Math.min(options.count, suggestions.length);
for (int i = 0; i < countLimit; i++) {
term = term.createTerm(suggestions[i]);
result.add(token, suggestions[i], reader.docFreq(term));
@ -180,8 +181,8 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
} else {
if (suggestions.length > 0) {
List<String> suggList = Arrays.asList(suggestions);
if (suggestions.length > count) {
suggList = suggList.subList(0, count);
if (suggestions.length > options.count) {
suggList = suggList.subList(0, options.count);
}
result.add(token, suggList);
}

View File

@ -70,46 +70,15 @@ public abstract class SolrSpellChecker {
*/
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
/**
* Assumes count = 1, onlyMorePopular = false, extendedResults = false
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader) throws IOException {
return getSuggestions(tokens, reader, 1, false, false);
}
/**
* Assumes onlyMorePopular = false, extendedResults = false
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count) throws IOException {
return getSuggestions(tokens, reader, count, false, false);
}
/**
* Assumes count = 1.
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, boolean onlyMorePopular, boolean extendedResults) throws IOException {
return getSuggestions(tokens, reader, 1, onlyMorePopular, extendedResults);
}
/**
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
* The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first.
* <p/>
*
* @param tokens The Tokens to be spell checked.
* @param reader The (optional) IndexReader. If there is not IndexReader, than extendedResults are not possible
* @param count The maximum number of suggestions to return
* @param onlyMorePopular TODO
* @param extendedResults TODO
* @throws IOException
* @param options The {@link SpellingOptions} to use
* @return The {@link SpellingResult} suggestions
* @throws IOException if there is an error producing suggestions
*/
public abstract SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count,
boolean onlyMorePopular, boolean extendedResults)
throws IOException;
public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException;
}

View File

@ -0,0 +1,94 @@
package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.params.SolrParams;
import java.util.Collection;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*
**/
public class SpellingOptions {
/**
* The tokens to spell check
*/
public Collection<Token> tokens;
/**
* An optional {@link org.apache.lucene.index.IndexReader}
*/
public IndexReader reader;
/**
* The number of suggestions to return, if there are any. Defaults to 1.
*/
public int count = 1;
/**
* Return only those results that are more popular, as defined by the implementation
*/
public boolean onlyMorePopular;
/**
* Provide additional, per implementation, information about the results
*/
public boolean extendedResults;
/**
* Optionally restrict the results to have a minimum accuracy level. Per Implementation.
* By default set to Float.MIN_VALUE.
*/
public float accuracy = Float.MIN_VALUE;
/**
* Any other custom params can be passed through. May be null and is null by default.
*/
public SolrParams customParams;
public SpellingOptions() {
}
//A couple of convenience ones
public SpellingOptions(Collection<Token> tokens, int count) {
this.tokens = tokens;
this.count = count;
}
public SpellingOptions(Collection<Token> tokens, IndexReader reader) {
this.tokens = tokens;
this.reader = reader;
}
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count) {
this.tokens = tokens;
this.reader = reader;
this.count = count;
}
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults, float accuracy, SolrParams customParams) {
this.tokens = tokens;
this.reader = reader;
this.count = count;
this.onlyMorePopular = onlyMorePopular;
this.extendedResults = extendedResults;
this.accuracy = accuracy;
this.customParams = customParams;
}
}

View File

@ -0,0 +1,61 @@
package org.apache.solr.handler.component;
import org.apache.lucene.analysis.Token;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A Dummy SpellChecker for testing purposes
*
**/
public class DummyCustomParamSpellChecker extends SolrSpellChecker {
@Override
public void reload() throws IOException {
}
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) {
}
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult();
//just spit back out the results
Iterator<String> iterator = options.customParams.getParameterNamesIterator();
int i = 0;
while (iterator.hasNext()){
String name = iterator.next();
String value = options.customParams.get(name);
result.add(new Token(name, i++, i++), Collections.singletonList(value));
}
return result;
}
}

View File

@ -24,6 +24,7 @@ import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
@ -33,7 +34,6 @@ import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.IndexBasedSpellChecker;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass;
import org.junit.Test;
@ -133,9 +133,9 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertTrue(cmdExec + " is not equal to " + "build",
cmdExec.equals("build") == true);
NamedList spellCheck = (NamedList) values.get("spellcheck");
assertTrue("spellCheck is null and it shouldn't be", spellCheck != null);
assertNotNull(spellCheck);
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
assertNotNull(suggestions);
NamedList document = (NamedList) suggestions.get("documemt");
assertEquals(1, document.get("numFound"));
assertEquals(0, document.get("startOffset"));
@ -145,6 +145,50 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertEquals("document", theSuggestion.iterator().next());
}
@Test
public void testPerDictionary() throws Exception {
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CommonParams.QT, "spellCheckCompRH");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(CommonParams.Q, "documemt");
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellingParams.SPELLCHECK_DICT, "perDict");
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.foo", "bar");
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.bar", "foo");
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
assertNotNull("suggestions", suggestions);
NamedList suggestion;
Collection<String> theSuggestion;
suggestion = (NamedList) suggestions.get("foo");
assertEquals(1, suggestion.get("numFound"));
assertEquals(0, suggestion.get("startOffset"));
assertEquals(suggestion.get("endOffset"), 1);
theSuggestion = (Collection<String>) suggestion.get("suggestion");
assertEquals(1, theSuggestion.size());
assertEquals("bar", theSuggestion.iterator().next());
suggestion = (NamedList) suggestions.get("bar");
assertEquals(1, suggestion.get("numFound"));
assertEquals(2, suggestion.get("startOffset"));
assertEquals(3, suggestion.get("endOffset"));
theSuggestion = (Collection<String>) suggestion.get("suggestion");
assertEquals(1, theSuggestion.size());
assertEquals("foo", theSuggestion.iterator().next());
}
@Test
public void testCollate() throws Exception {
SolrCore core = h.getCore();

View File

@ -80,15 +80,16 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("fob");
SpellingResult result = checker.getSuggestions(tokens, reader);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true);
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
@ -118,7 +119,9 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("Solar");
SpellingResult result = checker.getSuggestions(tokens, reader);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
@ -128,8 +131,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
//test something not in the spell checker
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
@ -160,7 +163,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("solar");
SpellingResult result = checker.getSuggestions(tokens, reader);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
@ -170,10 +174,10 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
}
}

View File

@ -125,10 +125,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("documemt");
SpellingResult result = checker.getSuggestions(tokens, reader);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@ -136,32 +137,33 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
//test something not in the spell checker
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
//test something that is spelled correctly
tokens = queryConverter.convert("document");
result = checker.getSuggestions(tokens, reader);
spellOpts.tokens = queryConverter.convert("document");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
//Has multiple possibilities, but the exact exists, so that should be returned
tokens = queryConverter.convert("red");
result = checker.getSuggestions(tokens, reader, 2);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
spellOpts.tokens = queryConverter.convert("red");
spellOpts.count = 2;
result = checker.getSuggestions(spellOpts);
assertNotNull(result);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
//Try out something which should have multiple suggestions
tokens = queryConverter.convert("bug");
result = checker.getSuggestions(tokens, reader, 2);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
spellOpts.tokens = queryConverter.convert("bug");
result = checker.getSuggestions(spellOpts);
assertNotNull(result);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertNotNull(suggestions);
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
entry = suggestions.entrySet().iterator().next();
@ -198,10 +200,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("documemt");
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@ -209,16 +212,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
//test something not in the spell checker
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader, 1, false, true);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
tokens = queryConverter.convert("document");
result = checker.getSuggestions(tokens, reader, 1, false, true);
spellOpts.tokens = queryConverter.convert("document");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
@ -304,10 +307,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("flesh");
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@ -315,16 +319,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
tokens = queryConverter.convert("super");
result = checker.getSuggestions(tokens, reader, 1, false, true);
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(tokens, reader, 1, false, true);
spellOpts.tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();

View File

@ -377,7 +377,11 @@
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="spellchecker">
<str name="name">perDict</str>
<str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
<str name="field">lowerfilt</str>
</lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>