mirror of https://github.com/apache/lucene.git
LUCENE-2608: Add per-method and request accuracy to spell checker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@987179 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4ec28930c9
commit
1620366b5c
|
@ -23,6 +23,9 @@ New Features
|
|||
* LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
|
||||
with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
|
||||
|
||||
* LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class
|
||||
method is also still available. (Grant Ingersoll)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
||||
|
|
|
@ -62,6 +62,11 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public class SpellChecker implements java.io.Closeable {
|
||||
|
||||
/**
|
||||
* The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} .
|
||||
*/
|
||||
public static final float DEFAULT_ACCURACY = 0.5f;
|
||||
|
||||
/**
|
||||
* Field name for each word in the ngram index.
|
||||
*/
|
||||
|
@ -75,21 +80,20 @@ public class SpellChecker implements java.io.Closeable {
|
|||
// don't modify the directory directly - see #swapSearcher()
|
||||
// TODO: why is this package private?
|
||||
Directory spellIndex;
|
||||
|
||||
/**
|
||||
* Boost value for start and end grams
|
||||
*/
|
||||
private float bStart = 2.0f;
|
||||
|
||||
private float bEnd = 1.0f;
|
||||
|
||||
// don't use this searcher directly - see #swapSearcher()
|
||||
private IndexSearcher searcher;
|
||||
|
||||
private IndexSearcher searcher;
|
||||
/*
|
||||
* this locks all modifications to the current searcher.
|
||||
*/
|
||||
private final Object searcherLock = new Object();
|
||||
|
||||
private final Object searcherLock = new Object();
|
||||
/*
|
||||
* this lock synchronizes all possible modifications to the
|
||||
* current index directory. It should not be possible to try modifying
|
||||
|
@ -97,13 +101,13 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* before acquiring this lock!
|
||||
*/
|
||||
private final Object modifyCurrentIndexLock = new Object();
|
||||
private volatile boolean closed = false;
|
||||
|
||||
private volatile boolean closed = false;
|
||||
// minimum score for hits generated by the spell checker query
|
||||
private float minScore = 0.5f;
|
||||
|
||||
private float accuracy = DEFAULT_ACCURACY;
|
||||
|
||||
private StringDistance sd;
|
||||
|
||||
private Comparator<SuggestWord> comparator;
|
||||
|
||||
/**
|
||||
|
@ -202,10 +206,20 @@ public class SpellChecker implements java.io.Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the accuracy 0 < minScore < 1; default 0.5
|
||||
* Sets the accuracy 0 < minScore < 1; default {@link #DEFAULT_ACCURACY}
|
||||
* @param acc The new accuracy
|
||||
*/
|
||||
public void setAccuracy(float minScore) {
|
||||
this.minScore = minScore;
|
||||
public void setAccuracy(float acc) {
|
||||
this.accuracy = acc;
|
||||
}
|
||||
|
||||
/**
|
||||
* The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to
|
||||
* decide whether a suggestion is included or not.
|
||||
* @return The current accuracy setting
|
||||
*/
|
||||
public float getAccuracy() {
|
||||
return accuracy;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -224,11 +238,71 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[]
|
||||
*
|
||||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug) throws IOException {
|
||||
return this.suggestSimilar(word, numSug, null, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest similar words.
|
||||
*
|
||||
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
|
||||
* is not the same as the edit distance strategy used to calculate the best
|
||||
* matching spell-checked word from the hits that Lucene found, one usually has
|
||||
* to retrieve a couple of numSug's in order to get the true best match.
|
||||
*
|
||||
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||
*
|
||||
* @param word the word you want a spell check done on
|
||||
* @param numSug the number of suggested words
|
||||
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[]
|
||||
*
|
||||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
|
||||
return this.suggestSimilar(word, numSug, null, null, false, accuracy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest similar words (optionally restricted to a field of an index).
|
||||
*
|
||||
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
|
||||
* is not the same as the edit distance strategy used to calculate the best
|
||||
* matching spell-checked word from the hits that Lucene found, one usually has
|
||||
* to retrieve a couple of numSug's in order to get the true best match.
|
||||
*
|
||||
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||
*
|
||||
* <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy.
|
||||
*
|
||||
* @param word the word you want a spell check done on
|
||||
* @param numSug the number of suggested words
|
||||
* @param ir the indexReader of the user index (can be null see field param)
|
||||
* @param field the field of the user index: if field is not null, the suggested
|
||||
* words are restricted to the words present in this field.
|
||||
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
||||
* (only if restricted mode = (indexReader!=null and field!=null)
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||
* of the suggest words in the field of the user index
|
||||
*
|
||||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, boolean morePopular) throws IOException {
|
||||
return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Suggest similar words (optionally restricted to a field of an index).
|
||||
*
|
||||
|
@ -247,6 +321,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* words are restricted to the words present in this field.
|
||||
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
||||
* (only if restricted mode = (indexReader!=null and field!=null)
|
||||
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||
|
@ -254,11 +329,11 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* of the suggest words in the field of the user index
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, boolean morePopular) throws IOException {
|
||||
String field, boolean morePopular, float accuracy) throws IOException {
|
||||
// obtainSearcher calls ensureOpen
|
||||
final IndexSearcher indexSearcher = obtainSearcher();
|
||||
try{
|
||||
float min = this.minScore;
|
||||
|
||||
final int lengthWord = word.length();
|
||||
|
||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||
|
@ -316,7 +391,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
|
||||
// edit distance
|
||||
sugWord.score = sd.getDistance(word,sugWord.string);
|
||||
if (sugWord.score < min) {
|
||||
if (sugWord.score < accuracy) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -330,7 +405,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
sugQueue.insertWithOverflow(sugWord);
|
||||
if (sugQueue.size() == numSug) {
|
||||
// if queue full, maintain the minScore score
|
||||
min = sugQueue.top().score;
|
||||
accuracy = sugQueue.top().score;
|
||||
}
|
||||
sugWord = new SuggestWord();
|
||||
}
|
||||
|
@ -346,7 +421,6 @@ public class SpellChecker implements java.io.Closeable {
|
|||
releaseSearcher(indexSearcher);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a clause to a boolean query.
|
||||
*/
|
||||
|
|
|
@ -104,11 +104,21 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
spellChecker.setAccuracy(0.8f);
|
||||
checkCommonSuggestions(r);
|
||||
checkJaroWinklerSuggestions();
|
||||
// the accuracy is set to 0.8 by default, but the best result has a score of 0.925
|
||||
String[] similar = spellChecker.suggestSimilar("fvie", 2, 0.93f);
|
||||
assertTrue(similar.length == 0);
|
||||
similar = spellChecker.suggestSimilar("fvie", 2, 0.92f);
|
||||
assertTrue(similar.length == 1);
|
||||
|
||||
similar = spellChecker.suggestSimilar("fiv", 2);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals(similar[0], "five");
|
||||
|
||||
spellChecker.setStringDistance(new NGramDistance(2));
|
||||
spellChecker.setAccuracy(0.5f);
|
||||
checkCommonSuggestions(r);
|
||||
checkNGramSuggestions();
|
||||
|
||||
r.close();
|
||||
}
|
||||
|
||||
|
@ -127,8 +137,6 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
if (!compareSP.isClosed())
|
||||
compareSP.close();
|
||||
compIdx.close();
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void checkCommonSuggestions(IndexReader r) throws IOException {
|
||||
|
|
|
@ -77,6 +77,12 @@ Upgrading from Solr 1.4
|
|||
legacy behavior should set a default value for the 'mm' param in
|
||||
their solrconfig.xml file.
|
||||
|
||||
* LUCENE-2608: Added the ability to specify the accuracy on a per request basis.
|
||||
Implementations of SolrSpellChecker must change over to the new SolrSpellChecker
|
||||
abstract methods using the new SpellingOptions class. While this change is not
|
||||
backward compatible, it should be trivial to migrate as the SpellingOptions class
|
||||
just encapsulates the parameters that were passed in to the methods before the change. (gsingers)
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
|
@ -226,11 +232,6 @@ New Features
|
|||
|
||||
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
|
||||
|
||||
* SOLR-2049: Add hl.multiValuedSeparatorChar for FastVectorHighlighter, per LUCENE-2603. (koji)
|
||||
|
||||
* SOLR-1881: add a url-scheme config string to SearchHandler to specify alternate
|
||||
URL prefixes for distributed search shard requests. (Sami Siren via yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -81,4 +81,9 @@ public interface SpellingParams {
|
|||
* Take the top suggestion for each token and create a new query from it
|
||||
*/
|
||||
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
|
||||
|
||||
/**
|
||||
* Certain spelling implementations may allow for an accuracy setting.
|
||||
*/
|
||||
public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
|
||||
}
|
||||
|
|
|
@ -23,11 +23,13 @@ import java.util.*;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.SpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -144,8 +146,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
NamedList response = new SimpleOrderedMap();
|
||||
IndexReader reader = rb.req.getSearcher().getReader();
|
||||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||
SpellingResult spellingResult = spellChecker.getSuggestions(tokens,
|
||||
reader, count, onlyMorePopular, extendedResults);
|
||||
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||
SolrParams customParams = getCustomParams(getDictionaryName(params), params);
|
||||
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
|
||||
accuracy, customParams);
|
||||
|
||||
SpellingResult spellingResult = spellChecker.getSuggestions(options);
|
||||
if (spellingResult != null) {
|
||||
response.add("suggestions", toNamedList(spellingResult, q,
|
||||
extendedResults, collate));
|
||||
|
@ -159,6 +165,24 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
|
||||
* XXXX=YYYY as a param to the custom param list
|
||||
* @param params The original SolrParams
|
||||
* @return The new Params
|
||||
*/
|
||||
protected SolrParams getCustomParams(String dictionary, SolrParams params) {
|
||||
ModifiableSolrParams result = new ModifiableSolrParams();
|
||||
Iterator<String> iter = params.getParameterNamesIterator();
|
||||
String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
|
||||
while (iter.hasNext()){
|
||||
String nxt = iter.next();
|
||||
if (nxt.startsWith(prefix)){
|
||||
result.add(nxt.substring(prefix.length()), params.getParams(nxt));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
|
@ -341,11 +365,15 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
|
||||
protected SolrSpellChecker getSpellChecker(SolrParams params) {
|
||||
return spellCheckers.get(getDictionaryName(params));
|
||||
}
|
||||
|
||||
private String getDictionaryName(SolrParams params) {
|
||||
String dictName = params.get(SPELLCHECK_DICT);
|
||||
if (dictName == null) {
|
||||
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
|
||||
}
|
||||
return spellCheckers.get(dictName);
|
||||
return dictName;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -150,29 +150,30 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
return name;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public SpellingResult getSuggestions(Collection<Token> tokens,
|
||||
IndexReader reader, int count, boolean onlyMorePopular,
|
||||
boolean extendedResults)
|
||||
throws IOException {
|
||||
SpellingResult result = new SpellingResult(tokens);
|
||||
reader = determineReader(reader);
|
||||
@Override
|
||||
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
||||
SpellingResult result = new SpellingResult(options.tokens);
|
||||
IndexReader reader = determineReader(options.reader);
|
||||
Term term = field != null ? new Term(field, "") : null;
|
||||
for (Token token : tokens) {
|
||||
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
|
||||
|
||||
int count = (int) Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
||||
for (Token token : options.tokens) {
|
||||
String tokenText = new String(token.buffer(), 0, token.length());
|
||||
String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
|
||||
String[] suggestions = spellChecker.suggestSimilar(tokenText,
|
||||
count,
|
||||
field != null ? reader : null, //workaround LUCENE-1295
|
||||
field,
|
||||
onlyMorePopular);
|
||||
options.onlyMorePopular, theAccuracy);
|
||||
if (suggestions.length == 1 && suggestions[0].equals(tokenText)) {
|
||||
//These are spelled the same, continue on
|
||||
continue;
|
||||
}
|
||||
|
||||
if (extendedResults == true && reader != null && field != null) {
|
||||
if (options.extendedResults == true && reader != null && field != null) {
|
||||
term = term.createTerm(tokenText);
|
||||
result.add(token, reader.docFreq(term));
|
||||
int countLimit = Math.min(count, suggestions.length);
|
||||
int countLimit = Math.min(options.count, suggestions.length);
|
||||
for (int i = 0; i < countLimit; i++) {
|
||||
term = term.createTerm(suggestions[i]);
|
||||
result.add(token, suggestions[i], reader.docFreq(term));
|
||||
|
@ -180,8 +181,8 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
} else {
|
||||
if (suggestions.length > 0) {
|
||||
List<String> suggList = Arrays.asList(suggestions);
|
||||
if (suggestions.length > count) {
|
||||
suggList = suggList.subList(0, count);
|
||||
if (suggestions.length > options.count) {
|
||||
suggList = suggList.subList(0, options.count);
|
||||
}
|
||||
result.add(token, suggList);
|
||||
}
|
||||
|
|
|
@ -70,46 +70,15 @@ public abstract class SolrSpellChecker {
|
|||
*/
|
||||
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
|
||||
|
||||
/**
|
||||
* Assumes count = 1, onlyMorePopular = false, extendedResults = false
|
||||
*
|
||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
||||
*/
|
||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader) throws IOException {
|
||||
return getSuggestions(tokens, reader, 1, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes onlyMorePopular = false, extendedResults = false
|
||||
*
|
||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
||||
*/
|
||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count) throws IOException {
|
||||
return getSuggestions(tokens, reader, count, false, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assumes count = 1.
|
||||
*
|
||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
||||
*/
|
||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, boolean onlyMorePopular, boolean extendedResults) throws IOException {
|
||||
return getSuggestions(tokens, reader, 1, onlyMorePopular, extendedResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
|
||||
* The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first.
|
||||
* <p/>
|
||||
*
|
||||
* @param tokens The Tokens to be spell checked.
|
||||
* @param reader The (optional) IndexReader. If there is not IndexReader, than extendedResults are not possible
|
||||
* @param count The maximum number of suggestions to return
|
||||
* @param onlyMorePopular TODO
|
||||
* @param extendedResults TODO
|
||||
* @throws IOException
|
||||
* @param options The {@link SpellingOptions} to use
|
||||
* @return The {@link SpellingResult} suggestions
|
||||
* @throws IOException if there is an error producing suggestions
|
||||
*/
|
||||
public abstract SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count,
|
||||
boolean onlyMorePopular, boolean extendedResults)
|
||||
throws IOException;
|
||||
public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
import java.util.Collection;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class SpellingOptions {
|
||||
|
||||
/**
|
||||
* The tokens to spell check
|
||||
*/
|
||||
public Collection<Token> tokens;
|
||||
/**
|
||||
* An optional {@link org.apache.lucene.index.IndexReader}
|
||||
*/
|
||||
public IndexReader reader;
|
||||
/**
|
||||
* The number of suggestions to return, if there are any. Defaults to 1.
|
||||
*/
|
||||
public int count = 1;
|
||||
/**
|
||||
* Return only those results that are more popular, as defined by the implementation
|
||||
*/
|
||||
public boolean onlyMorePopular;
|
||||
/**
|
||||
* Provide additional, per implementation, information about the results
|
||||
*/
|
||||
public boolean extendedResults;
|
||||
|
||||
/**
|
||||
* Optionally restrict the results to have a minimum accuracy level. Per Implementation.
|
||||
* By default set to Float.MIN_VALUE.
|
||||
*/
|
||||
public float accuracy = Float.MIN_VALUE;
|
||||
|
||||
/**
|
||||
* Any other custom params can be passed through. May be null and is null by default.
|
||||
*/
|
||||
public SolrParams customParams;
|
||||
|
||||
public SpellingOptions() {
|
||||
}
|
||||
|
||||
//A couple of convenience ones
|
||||
public SpellingOptions(Collection<Token> tokens, int count) {
|
||||
this.tokens = tokens;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
public SpellingOptions(Collection<Token> tokens, IndexReader reader) {
|
||||
this.tokens = tokens;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count) {
|
||||
this.tokens = tokens;
|
||||
this.reader = reader;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
|
||||
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults, float accuracy, SolrParams customParams) {
|
||||
this.tokens = tokens;
|
||||
this.reader = reader;
|
||||
this.count = count;
|
||||
this.onlyMorePopular = onlyMorePopular;
|
||||
this.extendedResults = extendedResults;
|
||||
this.accuracy = accuracy;
|
||||
this.customParams = customParams;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.SolrSpellChecker;
|
||||
import org.apache.solr.spelling.SpellingOptions;
|
||||
import org.apache.solr.spelling.SpellingResult;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Dummy SpellChecker for testing purposes
|
||||
*
|
||||
**/
|
||||
public class DummyCustomParamSpellChecker extends SolrSpellChecker {
|
||||
|
||||
@Override
|
||||
public void reload() throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void build(SolrCore core, SolrIndexSearcher searcher) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
||||
|
||||
SpellingResult result = new SpellingResult();
|
||||
//just spit back out the results
|
||||
Iterator<String> iterator = options.customParams.getParameterNamesIterator();
|
||||
int i = 0;
|
||||
while (iterator.hasNext()){
|
||||
String name = iterator.next();
|
||||
String value = options.customParams.get(name);
|
||||
result.add(new Token(name, i++, i++), Collections.singletonList(value));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.apache.solr.SolrTestCaseJ4;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -33,7 +34,6 @@ import org.apache.solr.request.SolrRequestHandler;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -133,9 +133,9 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
assertTrue(cmdExec + " is not equal to " + "build",
|
||||
cmdExec.equals("build") == true);
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
assertTrue("spellCheck is null and it shouldn't be", spellCheck != null);
|
||||
assertNotNull(spellCheck);
|
||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
|
||||
assertNotNull(suggestions);
|
||||
NamedList document = (NamedList) suggestions.get("documemt");
|
||||
assertEquals(1, document.get("numFound"));
|
||||
assertEquals(0, document.get("startOffset"));
|
||||
|
@ -145,6 +145,50 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
assertEquals("document", theSuggestion.iterator().next());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPerDictionary() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CommonParams.QT, "spellCheckCompRH");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||
params.add(CommonParams.Q, "documemt");
|
||||
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||
params.add(SpellingParams.SPELLCHECK_DICT, "perDict");
|
||||
|
||||
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.foo", "bar");
|
||||
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.bar", "foo");
|
||||
|
||||
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
assertNotNull("suggestions", suggestions);
|
||||
NamedList suggestion;
|
||||
Collection<String> theSuggestion;
|
||||
suggestion = (NamedList) suggestions.get("foo");
|
||||
assertEquals(1, suggestion.get("numFound"));
|
||||
assertEquals(0, suggestion.get("startOffset"));
|
||||
assertEquals(suggestion.get("endOffset"), 1);
|
||||
theSuggestion = (Collection<String>) suggestion.get("suggestion");
|
||||
assertEquals(1, theSuggestion.size());
|
||||
assertEquals("bar", theSuggestion.iterator().next());
|
||||
|
||||
suggestion = (NamedList) suggestions.get("bar");
|
||||
assertEquals(1, suggestion.get("numFound"));
|
||||
assertEquals(2, suggestion.get("startOffset"));
|
||||
assertEquals(3, suggestion.get("endOffset"));
|
||||
theSuggestion = (Collection<String>) suggestion.get("suggestion");
|
||||
assertEquals(1, theSuggestion.size());
|
||||
assertEquals("foo", theSuggestion.iterator().next());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollate() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
|
|
|
@ -80,15 +80,16 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("fob");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||
assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true);
|
||||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
@ -118,7 +119,9 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("Solar");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
||||
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
|
@ -128,8 +131,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||
|
||||
//test something not in the spell checker
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
@ -160,7 +163,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("solar");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
|
@ -170,10 +174,10 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||
|
||||
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,10 +125,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = searcher.getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("documemt");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
||||
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||
|
@ -136,32 +137,33 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||
|
||||
//test something not in the spell checker
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
||||
//test something that is spelled correctly
|
||||
tokens = queryConverter.convert("document");
|
||||
result = checker.getSuggestions(tokens, reader);
|
||||
spellOpts.tokens = queryConverter.convert("document");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
|
||||
|
||||
//Has multiple possibilities, but the exact exists, so that should be returned
|
||||
tokens = queryConverter.convert("red");
|
||||
result = checker.getSuggestions(tokens, reader, 2);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
spellOpts.tokens = queryConverter.convert("red");
|
||||
spellOpts.count = 2;
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertNotNull(result);
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
||||
//Try out something which should have multiple suggestions
|
||||
tokens = queryConverter.convert("bug");
|
||||
result = checker.getSuggestions(tokens, reader, 2);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
|
||||
spellOpts.tokens = queryConverter.convert("bug");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertNotNull(result);
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertNotNull(suggestions);
|
||||
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
|
||||
|
||||
entry = suggestions.entrySet().iterator().next();
|
||||
|
@ -198,10 +200,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = searcher.getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("documemt");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
||||
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||
|
@ -209,16 +212,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
|
||||
|
||||
//test something not in the spell checker
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
||||
tokens = queryConverter.convert("document");
|
||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
spellOpts.tokens = queryConverter.convert("document");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
} finally {
|
||||
holder.decref();
|
||||
|
@ -304,10 +307,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
IndexReader reader = searcher.getReader();
|
||||
Collection<Token> tokens = queryConverter.convert("flesh");
|
||||
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
|
||||
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("flesh is null and it shouldn't be", suggestions != null);
|
||||
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||
|
@ -315,16 +319,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
|
||||
|
||||
//test something not in the spell checker
|
||||
tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
spellOpts.tokens = queryConverter.convert("super");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
|
||||
tokens = queryConverter.convert("Caroline");
|
||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
||||
spellOpts.tokens = queryConverter.convert("Caroline");
|
||||
result = checker.getSuggestions(spellOpts);
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
suggestions = result.get(tokens.iterator().next());
|
||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
} finally {
|
||||
holder.decref();
|
||||
|
|
|
@ -377,7 +377,11 @@
|
|||
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
|
||||
<lst name="spellchecker">
|
||||
<str name="name">perDict</str>
|
||||
<str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
||||
|
|
Loading…
Reference in New Issue