mirror of https://github.com/apache/lucene.git
LUCENE-2608: Add per-method and request accuracy to spell checker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@987179 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4ec28930c9
commit
1620366b5c
|
@ -23,6 +23,9 @@ New Features
|
||||||
* LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
|
* LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
|
||||||
with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
|
with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
|
||||||
|
|
||||||
|
* LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class
|
||||||
|
method is also still available. (Grant Ingersoll)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
||||||
|
|
|
@ -62,11 +62,16 @@ import org.apache.lucene.util.Version;
|
||||||
*/
|
*/
|
||||||
public class SpellChecker implements java.io.Closeable {
|
public class SpellChecker implements java.io.Closeable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} .
|
||||||
|
*/
|
||||||
|
public static final float DEFAULT_ACCURACY = 0.5f;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Field name for each word in the ngram index.
|
* Field name for each word in the ngram index.
|
||||||
*/
|
*/
|
||||||
public static final String F_WORD = "word";
|
public static final String F_WORD = "word";
|
||||||
|
|
||||||
private static final Term F_WORD_TERM = new Term(F_WORD);
|
private static final Term F_WORD_TERM = new Term(F_WORD);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -75,35 +80,34 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
// don't modify the directory directly - see #swapSearcher()
|
// don't modify the directory directly - see #swapSearcher()
|
||||||
// TODO: why is this package private?
|
// TODO: why is this package private?
|
||||||
Directory spellIndex;
|
Directory spellIndex;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Boost value for start and end grams
|
* Boost value for start and end grams
|
||||||
*/
|
*/
|
||||||
private float bStart = 2.0f;
|
private float bStart = 2.0f;
|
||||||
private float bEnd = 1.0f;
|
|
||||||
|
|
||||||
|
private float bEnd = 1.0f;
|
||||||
// don't use this searcher directly - see #swapSearcher()
|
// don't use this searcher directly - see #swapSearcher()
|
||||||
|
|
||||||
private IndexSearcher searcher;
|
private IndexSearcher searcher;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this locks all modifications to the current searcher.
|
* this locks all modifications to the current searcher.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private final Object searcherLock = new Object();
|
private final Object searcherLock = new Object();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this lock synchronizes all possible modifications to the
|
* this lock synchronizes all possible modifications to the
|
||||||
* current index directory. It should not be possible to try modifying
|
* current index directory. It should not be possible to try modifying
|
||||||
* the same index concurrently. Note: Do not acquire the searcher lock
|
* the same index concurrently. Note: Do not acquire the searcher lock
|
||||||
* before acquiring this lock!
|
* before acquiring this lock!
|
||||||
*/
|
*/
|
||||||
private final Object modifyCurrentIndexLock = new Object();
|
private final Object modifyCurrentIndexLock = new Object();
|
||||||
|
|
||||||
private volatile boolean closed = false;
|
private volatile boolean closed = false;
|
||||||
|
|
||||||
// minimum score for hits generated by the spell checker query
|
// minimum score for hits generated by the spell checker query
|
||||||
private float minScore = 0.5f;
|
|
||||||
|
|
||||||
private StringDistance sd;
|
|
||||||
|
|
||||||
|
private float accuracy = DEFAULT_ACCURACY;
|
||||||
|
|
||||||
|
private StringDistance sd;
|
||||||
private Comparator<SuggestWord> comparator;
|
private Comparator<SuggestWord> comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -202,10 +206,20 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the accuracy 0 < minScore < 1; default 0.5
|
* Sets the accuracy 0 < minScore < 1; default {@link #DEFAULT_ACCURACY}
|
||||||
|
* @param acc The new accuracy
|
||||||
*/
|
*/
|
||||||
public void setAccuracy(float minScore) {
|
public void setAccuracy(float acc) {
|
||||||
this.minScore = minScore;
|
this.accuracy = acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to
|
||||||
|
* decide whether a suggestion is included or not.
|
||||||
|
* @return The current accuracy setting
|
||||||
|
*/
|
||||||
|
public float getAccuracy() {
|
||||||
|
return accuracy;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -224,11 +238,37 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
* @throws IOException if the underlying index throws an {@link IOException}
|
* @throws IOException if the underlying index throws an {@link IOException}
|
||||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||||
* @return String[]
|
* @return String[]
|
||||||
|
*
|
||||||
|
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||||
*/
|
*/
|
||||||
public String[] suggestSimilar(String word, int numSug) throws IOException {
|
public String[] suggestSimilar(String word, int numSug) throws IOException {
|
||||||
return this.suggestSimilar(word, numSug, null, null, false);
|
return this.suggestSimilar(word, numSug, null, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suggest similar words.
|
||||||
|
*
|
||||||
|
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
|
||||||
|
* is not the same as the edit distance strategy used to calculate the best
|
||||||
|
* matching spell-checked word from the hits that Lucene found, one usually has
|
||||||
|
* to retrieve a couple of numSug's in order to get the true best match.
|
||||||
|
*
|
||||||
|
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||||
|
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||||
|
*
|
||||||
|
* @param word the word you want a spell check done on
|
||||||
|
* @param numSug the number of suggested words
|
||||||
|
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
|
||||||
|
* @throws IOException if the underlying index throws an {@link IOException}
|
||||||
|
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||||
|
* @return String[]
|
||||||
|
*
|
||||||
|
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||||
|
*/
|
||||||
|
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
|
||||||
|
return this.suggestSimilar(word, numSug, null, null, false, accuracy);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Suggest similar words (optionally restricted to a field of an index).
|
* Suggest similar words (optionally restricted to a field of an index).
|
||||||
*
|
*
|
||||||
|
@ -240,6 +280,40 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||||
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||||
*
|
*
|
||||||
|
* <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy.
|
||||||
|
*
|
||||||
|
* @param word the word you want a spell check done on
|
||||||
|
* @param numSug the number of suggested words
|
||||||
|
* @param ir the indexReader of the user index (can be null see field param)
|
||||||
|
* @param field the field of the user index: if field is not null, the suggested
|
||||||
|
* words are restricted to the words present in this field.
|
||||||
|
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
||||||
|
* (only if restricted mode = (indexReader!=null and field!=null)
|
||||||
|
* @throws IOException if the underlying index throws an {@link IOException}
|
||||||
|
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||||
|
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||||
|
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||||
|
* of the suggest words in the field of the user index
|
||||||
|
*
|
||||||
|
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||||
|
*/
|
||||||
|
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||||
|
String field, boolean morePopular) throws IOException {
|
||||||
|
return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suggest similar words (optionally restricted to a field of an index).
|
||||||
|
*
|
||||||
|
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
|
||||||
|
* is not the same as the edit distance strategy used to calculate the best
|
||||||
|
* matching spell-checked word from the hits that Lucene found, one usually has
|
||||||
|
* to retrieve a couple of numSug's in order to get the true best match.
|
||||||
|
*
|
||||||
|
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||||
|
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||||
|
*
|
||||||
* @param word the word you want a spell check done on
|
* @param word the word you want a spell check done on
|
||||||
* @param numSug the number of suggested words
|
* @param numSug the number of suggested words
|
||||||
* @param ir the indexReader of the user index (can be null see field param)
|
* @param ir the indexReader of the user index (can be null see field param)
|
||||||
|
@ -247,6 +321,7 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
* words are restricted to the words present in this field.
|
* words are restricted to the words present in this field.
|
||||||
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
||||||
* (only if restricted mode = (indexReader!=null and field!=null)
|
* (only if restricted mode = (indexReader!=null and field!=null)
|
||||||
|
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
|
||||||
* @throws IOException if the underlying index throws an {@link IOException}
|
* @throws IOException if the underlying index throws an {@link IOException}
|
||||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||||
|
@ -254,72 +329,72 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
* of the suggest words in the field of the user index
|
* of the suggest words in the field of the user index
|
||||||
*/
|
*/
|
||||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||||
String field, boolean morePopular) throws IOException {
|
String field, boolean morePopular, float accuracy) throws IOException {
|
||||||
// obtainSearcher calls ensureOpen
|
// obtainSearcher calls ensureOpen
|
||||||
final IndexSearcher indexSearcher = obtainSearcher();
|
final IndexSearcher indexSearcher = obtainSearcher();
|
||||||
try{
|
try{
|
||||||
float min = this.minScore;
|
|
||||||
final int lengthWord = word.length();
|
final int lengthWord = word.length();
|
||||||
|
|
||||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||||
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
|
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
|
||||||
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
||||||
if (!morePopular && freq > 0) {
|
if (!morePopular && freq > 0) {
|
||||||
return new String[] { word };
|
return new String[] { word };
|
||||||
}
|
}
|
||||||
|
|
||||||
BooleanQuery query = new BooleanQuery();
|
BooleanQuery query = new BooleanQuery();
|
||||||
String[] grams;
|
String[] grams;
|
||||||
String key;
|
String key;
|
||||||
|
|
||||||
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
|
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
|
||||||
|
|
||||||
key = "gram" + ng; // form key
|
key = "gram" + ng; // form key
|
||||||
|
|
||||||
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
|
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
|
||||||
|
|
||||||
if (grams.length == 0) {
|
if (grams.length == 0) {
|
||||||
continue; // hmm
|
continue; // hmm
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bStart > 0) { // should we boost prefixes?
|
if (bStart > 0) { // should we boost prefixes?
|
||||||
add(query, "start" + ng, grams[0], bStart); // matches start of word
|
add(query, "start" + ng, grams[0], bStart); // matches start of word
|
||||||
|
|
||||||
}
|
}
|
||||||
if (bEnd > 0) { // should we boost suffixes
|
if (bEnd > 0) { // should we boost suffixes
|
||||||
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
|
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
|
||||||
|
|
||||||
}
|
}
|
||||||
for (int i = 0; i < grams.length; i++) {
|
for (int i = 0; i < grams.length; i++) {
|
||||||
add(query, key, grams[i]);
|
add(query, key, grams[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int maxHits = 10 * numSug;
|
int maxHits = 10 * numSug;
|
||||||
|
|
||||||
// System.out.println("Q: " + query);
|
// System.out.println("Q: " + query);
|
||||||
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
|
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
|
||||||
// System.out.println("HITS: " + hits.length());
|
// System.out.println("HITS: " + hits.length());
|
||||||
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
|
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
|
||||||
|
|
||||||
// go thru more than 'maxr' matches in case the distance filter triggers
|
// go thru more than 'maxr' matches in case the distance filter triggers
|
||||||
int stop = Math.min(hits.length, maxHits);
|
int stop = Math.min(hits.length, maxHits);
|
||||||
SuggestWord sugWord = new SuggestWord();
|
SuggestWord sugWord = new SuggestWord();
|
||||||
for (int i = 0; i < stop; i++) {
|
for (int i = 0; i < stop; i++) {
|
||||||
|
|
||||||
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
|
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
|
||||||
|
|
||||||
// don't suggest a word for itself, that would be silly
|
// don't suggest a word for itself, that would be silly
|
||||||
if (sugWord.string.equals(word)) {
|
if (sugWord.string.equals(word)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// edit distance
|
// edit distance
|
||||||
sugWord.score = sd.getDistance(word,sugWord.string);
|
sugWord.score = sd.getDistance(word,sugWord.string);
|
||||||
if (sugWord.score < min) {
|
if (sugWord.score < accuracy) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ir != null && field != null) { // use the user index
|
if (ir != null && field != null) { // use the user index
|
||||||
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
|
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
|
||||||
// don't suggest a word that is not present in the field
|
// don't suggest a word that is not present in the field
|
||||||
|
@ -330,23 +405,22 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
sugQueue.insertWithOverflow(sugWord);
|
sugQueue.insertWithOverflow(sugWord);
|
||||||
if (sugQueue.size() == numSug) {
|
if (sugQueue.size() == numSug) {
|
||||||
// if queue full, maintain the minScore score
|
// if queue full, maintain the minScore score
|
||||||
min = sugQueue.top().score;
|
accuracy = sugQueue.top().score;
|
||||||
}
|
}
|
||||||
sugWord = new SuggestWord();
|
sugWord = new SuggestWord();
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert to array string
|
// convert to array string
|
||||||
String[] list = new String[sugQueue.size()];
|
String[] list = new String[sugQueue.size()];
|
||||||
for (int i = sugQueue.size() - 1; i >= 0; i--) {
|
for (int i = sugQueue.size() - 1; i >= 0; i--) {
|
||||||
list[i] = sugQueue.pop().string;
|
list[i] = sugQueue.pop().string;
|
||||||
}
|
}
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
} finally {
|
} finally {
|
||||||
releaseSearcher(indexSearcher);
|
releaseSearcher(indexSearcher);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a clause to a boolean query.
|
* Add a clause to a boolean query.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -104,11 +104,21 @@ public class TestSpellChecker extends LuceneTestCase {
|
||||||
spellChecker.setAccuracy(0.8f);
|
spellChecker.setAccuracy(0.8f);
|
||||||
checkCommonSuggestions(r);
|
checkCommonSuggestions(r);
|
||||||
checkJaroWinklerSuggestions();
|
checkJaroWinklerSuggestions();
|
||||||
|
// the accuracy is set to 0.8 by default, but the best result has a score of 0.925
|
||||||
|
String[] similar = spellChecker.suggestSimilar("fvie", 2, 0.93f);
|
||||||
|
assertTrue(similar.length == 0);
|
||||||
|
similar = spellChecker.suggestSimilar("fvie", 2, 0.92f);
|
||||||
|
assertTrue(similar.length == 1);
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("fiv", 2);
|
||||||
|
assertTrue(similar.length > 0);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
spellChecker.setStringDistance(new NGramDistance(2));
|
spellChecker.setStringDistance(new NGramDistance(2));
|
||||||
spellChecker.setAccuracy(0.5f);
|
spellChecker.setAccuracy(0.5f);
|
||||||
checkCommonSuggestions(r);
|
checkCommonSuggestions(r);
|
||||||
checkNGramSuggestions();
|
checkNGramSuggestions();
|
||||||
|
|
||||||
r.close();
|
r.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,8 +137,6 @@ public class TestSpellChecker extends LuceneTestCase {
|
||||||
if (!compareSP.isClosed())
|
if (!compareSP.isClosed())
|
||||||
compareSP.close();
|
compareSP.close();
|
||||||
compIdx.close();
|
compIdx.close();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkCommonSuggestions(IndexReader r) throws IOException {
|
private void checkCommonSuggestions(IndexReader r) throws IOException {
|
||||||
|
|
|
@ -77,6 +77,12 @@ Upgrading from Solr 1.4
|
||||||
legacy behavior should set a default value for the 'mm' param in
|
legacy behavior should set a default value for the 'mm' param in
|
||||||
their solrconfig.xml file.
|
their solrconfig.xml file.
|
||||||
|
|
||||||
|
* LUCENE-2608: Added the ability to specify the accuracy on a per request basis.
|
||||||
|
Implementations of SolrSpellChecker must change over to the new SolrSpellChecker
|
||||||
|
abstract methods using the new SpellingOptions class. While this change is not
|
||||||
|
backward compatible, it should be trivial to migrate as the SpellingOptions class
|
||||||
|
just encapsulates the parameters that were passed in to the methods before the change. (gsingers)
|
||||||
|
|
||||||
Detailed Change List
|
Detailed Change List
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -226,11 +232,6 @@ New Features
|
||||||
|
|
||||||
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
|
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
|
||||||
|
|
||||||
* SOLR-2049: Add hl.multiValuedSeparatorChar for FastVectorHighlighter, per LUCENE-2603. (koji)
|
|
||||||
|
|
||||||
* SOLR-1881: add a url-scheme config string to SearchHandler to specify alternate
|
|
||||||
URL prefixes for distributed search shard requests. (Sami Siren via yonik)
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -81,4 +81,9 @@ public interface SpellingParams {
|
||||||
* Take the top suggestion for each token and create a new query from it
|
* Take the top suggestion for each token and create a new query from it
|
||||||
*/
|
*/
|
||||||
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
|
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Certain spelling implementations may allow for an accuracy setting.
|
||||||
|
*/
|
||||||
|
public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,11 +23,13 @@ import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||||
|
import org.apache.lucene.search.spell.SpellChecker;
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -144,8 +146,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
NamedList response = new SimpleOrderedMap();
|
NamedList response = new SimpleOrderedMap();
|
||||||
IndexReader reader = rb.req.getSearcher().getReader();
|
IndexReader reader = rb.req.getSearcher().getReader();
|
||||||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||||
SpellingResult spellingResult = spellChecker.getSuggestions(tokens,
|
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||||
reader, count, onlyMorePopular, extendedResults);
|
SolrParams customParams = getCustomParams(getDictionaryName(params), params);
|
||||||
|
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
|
||||||
|
accuracy, customParams);
|
||||||
|
|
||||||
|
SpellingResult spellingResult = spellChecker.getSuggestions(options);
|
||||||
if (spellingResult != null) {
|
if (spellingResult != null) {
|
||||||
response.add("suggestions", toNamedList(spellingResult, q,
|
response.add("suggestions", toNamedList(spellingResult, q,
|
||||||
extendedResults, collate));
|
extendedResults, collate));
|
||||||
|
@ -159,6 +165,24 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
|
||||||
|
* XXXX=YYYY as a param to the custom param list
|
||||||
|
* @param params The original SolrParams
|
||||||
|
* @return The new Params
|
||||||
|
*/
|
||||||
|
protected SolrParams getCustomParams(String dictionary, SolrParams params) {
|
||||||
|
ModifiableSolrParams result = new ModifiableSolrParams();
|
||||||
|
Iterator<String> iter = params.getParameterNamesIterator();
|
||||||
|
String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
|
||||||
|
while (iter.hasNext()){
|
||||||
|
String nxt = iter.next();
|
||||||
|
if (nxt.startsWith(prefix)){
|
||||||
|
result.add(nxt.substring(prefix.length()), params.getParams(nxt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -341,13 +365,17 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SolrSpellChecker getSpellChecker(SolrParams params) {
|
protected SolrSpellChecker getSpellChecker(SolrParams params) {
|
||||||
|
return spellCheckers.get(getDictionaryName(params));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getDictionaryName(SolrParams params) {
|
||||||
String dictName = params.get(SPELLCHECK_DICT);
|
String dictName = params.get(SPELLCHECK_DICT);
|
||||||
if (dictName == null) {
|
if (dictName == null) {
|
||||||
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
|
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
|
||||||
}
|
}
|
||||||
return spellCheckers.get(dictName);
|
return dictName;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the spellchecker registered to a given name
|
* @return the spellchecker registered to a given name
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -150,29 +150,30 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@Override
|
||||||
public SpellingResult getSuggestions(Collection<Token> tokens,
|
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
||||||
IndexReader reader, int count, boolean onlyMorePopular,
|
SpellingResult result = new SpellingResult(options.tokens);
|
||||||
boolean extendedResults)
|
IndexReader reader = determineReader(options.reader);
|
||||||
throws IOException {
|
|
||||||
SpellingResult result = new SpellingResult(tokens);
|
|
||||||
reader = determineReader(reader);
|
|
||||||
Term term = field != null ? new Term(field, "") : null;
|
Term term = field != null ? new Term(field, "") : null;
|
||||||
for (Token token : tokens) {
|
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
|
||||||
|
|
||||||
|
int count = (int) Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
||||||
|
for (Token token : options.tokens) {
|
||||||
String tokenText = new String(token.buffer(), 0, token.length());
|
String tokenText = new String(token.buffer(), 0, token.length());
|
||||||
String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
|
String[] suggestions = spellChecker.suggestSimilar(tokenText,
|
||||||
|
count,
|
||||||
field != null ? reader : null, //workaround LUCENE-1295
|
field != null ? reader : null, //workaround LUCENE-1295
|
||||||
field,
|
field,
|
||||||
onlyMorePopular);
|
options.onlyMorePopular, theAccuracy);
|
||||||
if (suggestions.length == 1 && suggestions[0].equals(tokenText)) {
|
if (suggestions.length == 1 && suggestions[0].equals(tokenText)) {
|
||||||
//These are spelled the same, continue on
|
//These are spelled the same, continue on
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extendedResults == true && reader != null && field != null) {
|
if (options.extendedResults == true && reader != null && field != null) {
|
||||||
term = term.createTerm(tokenText);
|
term = term.createTerm(tokenText);
|
||||||
result.add(token, reader.docFreq(term));
|
result.add(token, reader.docFreq(term));
|
||||||
int countLimit = Math.min(count, suggestions.length);
|
int countLimit = Math.min(options.count, suggestions.length);
|
||||||
for (int i = 0; i < countLimit; i++) {
|
for (int i = 0; i < countLimit; i++) {
|
||||||
term = term.createTerm(suggestions[i]);
|
term = term.createTerm(suggestions[i]);
|
||||||
result.add(token, suggestions[i], reader.docFreq(term));
|
result.add(token, suggestions[i], reader.docFreq(term));
|
||||||
|
@ -180,8 +181,8 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
} else {
|
} else {
|
||||||
if (suggestions.length > 0) {
|
if (suggestions.length > 0) {
|
||||||
List<String> suggList = Arrays.asList(suggestions);
|
List<String> suggList = Arrays.asList(suggestions);
|
||||||
if (suggestions.length > count) {
|
if (suggestions.length > options.count) {
|
||||||
suggList = suggList.subList(0, count);
|
suggList = suggList.subList(0, options.count);
|
||||||
}
|
}
|
||||||
result.add(token, suggList);
|
result.add(token, suggList);
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,46 +70,15 @@ public abstract class SolrSpellChecker {
|
||||||
*/
|
*/
|
||||||
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
|
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
|
||||||
|
|
||||||
/**
|
|
||||||
* Assumes count = 1, onlyMorePopular = false, extendedResults = false
|
|
||||||
*
|
|
||||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
|
||||||
*/
|
|
||||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader) throws IOException {
|
|
||||||
return getSuggestions(tokens, reader, 1, false, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Assumes onlyMorePopular = false, extendedResults = false
|
|
||||||
*
|
|
||||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
|
||||||
*/
|
|
||||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count) throws IOException {
|
|
||||||
return getSuggestions(tokens, reader, count, false, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Assumes count = 1.
|
|
||||||
*
|
|
||||||
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
|
|
||||||
*/
|
|
||||||
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, boolean onlyMorePopular, boolean extendedResults) throws IOException {
|
|
||||||
return getSuggestions(tokens, reader, 1, onlyMorePopular, extendedResults);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
|
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
|
||||||
* The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first.
|
* The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first.
|
||||||
|
* <p/>
|
||||||
*
|
*
|
||||||
* @param tokens The Tokens to be spell checked.
|
* @param options The {@link SpellingOptions} to use
|
||||||
* @param reader The (optional) IndexReader. If there is not IndexReader, than extendedResults are not possible
|
* @return The {@link SpellingResult} suggestions
|
||||||
* @param count The maximum number of suggestions to return
|
* @throws IOException if there is an error producing suggestions
|
||||||
* @param onlyMorePopular TODO
|
|
||||||
* @param extendedResults TODO
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
*/
|
||||||
public abstract SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count,
|
public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException;
|
||||||
boolean onlyMorePopular, boolean extendedResults)
|
|
||||||
throws IOException;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class SpellingOptions {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The tokens to spell check
|
||||||
|
*/
|
||||||
|
public Collection<Token> tokens;
|
||||||
|
/**
|
||||||
|
* An optional {@link org.apache.lucene.index.IndexReader}
|
||||||
|
*/
|
||||||
|
public IndexReader reader;
|
||||||
|
/**
|
||||||
|
* The number of suggestions to return, if there are any. Defaults to 1.
|
||||||
|
*/
|
||||||
|
public int count = 1;
|
||||||
|
/**
|
||||||
|
* Return only those results that are more popular, as defined by the implementation
|
||||||
|
*/
|
||||||
|
public boolean onlyMorePopular;
|
||||||
|
/**
|
||||||
|
* Provide additional, per implementation, information about the results
|
||||||
|
*/
|
||||||
|
public boolean extendedResults;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optionally restrict the results to have a minimum accuracy level. Per Implementation.
|
||||||
|
* By default set to Float.MIN_VALUE.
|
||||||
|
*/
|
||||||
|
public float accuracy = Float.MIN_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Any other custom params can be passed through. May be null and is null by default.
|
||||||
|
*/
|
||||||
|
public SolrParams customParams;
|
||||||
|
|
||||||
|
public SpellingOptions() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//A couple of convenience ones
|
||||||
|
public SpellingOptions(Collection<Token> tokens, int count) {
|
||||||
|
this.tokens = tokens;
|
||||||
|
this.count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpellingOptions(Collection<Token> tokens, IndexReader reader) {
|
||||||
|
this.tokens = tokens;
|
||||||
|
this.reader = reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count) {
|
||||||
|
this.tokens = tokens;
|
||||||
|
this.reader = reader;
|
||||||
|
this.count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults, float accuracy, SolrParams customParams) {
|
||||||
|
this.tokens = tokens;
|
||||||
|
this.reader = reader;
|
||||||
|
this.count = count;
|
||||||
|
this.onlyMorePopular = onlyMorePopular;
|
||||||
|
this.extendedResults = extendedResults;
|
||||||
|
this.accuracy = accuracy;
|
||||||
|
this.customParams = customParams;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
package org.apache.solr.handler.component;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.spelling.SolrSpellChecker;
|
||||||
|
import org.apache.solr.spelling.SpellingOptions;
|
||||||
|
import org.apache.solr.spelling.SpellingResult;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Dummy SpellChecker for testing purposes
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class DummyCustomParamSpellChecker extends SolrSpellChecker {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reload() throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void build(SolrCore core, SolrIndexSearcher searcher) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
||||||
|
|
||||||
|
SpellingResult result = new SpellingResult();
|
||||||
|
//just spit back out the results
|
||||||
|
Iterator<String> iterator = options.customParams.getParameterNamesIterator();
|
||||||
|
int i = 0;
|
||||||
|
while (iterator.hasNext()){
|
||||||
|
String name = iterator.next();
|
||||||
|
String value = options.customParams.get(name);
|
||||||
|
result.add(new Token(name, i++, i++), Collections.singletonList(value));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,6 +24,7 @@ import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.MapSolrParams;
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
@ -33,7 +34,6 @@ import org.apache.solr.request.SolrRequestHandler;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||||
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -133,9 +133,9 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(cmdExec + " is not equal to " + "build",
|
assertTrue(cmdExec + " is not equal to " + "build",
|
||||||
cmdExec.equals("build") == true);
|
cmdExec.equals("build") == true);
|
||||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||||
assertTrue("spellCheck is null and it shouldn't be", spellCheck != null);
|
assertNotNull(spellCheck);
|
||||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||||
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
|
assertNotNull(suggestions);
|
||||||
NamedList document = (NamedList) suggestions.get("documemt");
|
NamedList document = (NamedList) suggestions.get("documemt");
|
||||||
assertEquals(1, document.get("numFound"));
|
assertEquals(1, document.get("numFound"));
|
||||||
assertEquals(0, document.get("startOffset"));
|
assertEquals(0, document.get("startOffset"));
|
||||||
|
@ -145,6 +145,50 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
assertEquals("document", theSuggestion.iterator().next());
|
assertEquals("document", theSuggestion.iterator().next());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPerDictionary() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||||
|
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(CommonParams.QT, "spellCheckCompRH");
|
||||||
|
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||||
|
params.add(CommonParams.Q, "documemt");
|
||||||
|
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||||
|
params.add(SpellingParams.SPELLCHECK_DICT, "perDict");
|
||||||
|
|
||||||
|
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.foo", "bar");
|
||||||
|
params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.bar", "foo");
|
||||||
|
|
||||||
|
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||||
|
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||||
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
|
NamedList values = rsp.getValues();
|
||||||
|
|
||||||
|
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||||
|
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||||
|
assertNotNull("suggestions", suggestions);
|
||||||
|
NamedList suggestion;
|
||||||
|
Collection<String> theSuggestion;
|
||||||
|
suggestion = (NamedList) suggestions.get("foo");
|
||||||
|
assertEquals(1, suggestion.get("numFound"));
|
||||||
|
assertEquals(0, suggestion.get("startOffset"));
|
||||||
|
assertEquals(suggestion.get("endOffset"), 1);
|
||||||
|
theSuggestion = (Collection<String>) suggestion.get("suggestion");
|
||||||
|
assertEquals(1, theSuggestion.size());
|
||||||
|
assertEquals("bar", theSuggestion.iterator().next());
|
||||||
|
|
||||||
|
suggestion = (NamedList) suggestions.get("bar");
|
||||||
|
assertEquals(1, suggestion.get("numFound"));
|
||||||
|
assertEquals(2, suggestion.get("startOffset"));
|
||||||
|
assertEquals(3, suggestion.get("endOffset"));
|
||||||
|
theSuggestion = (Collection<String>) suggestion.get("suggestion");
|
||||||
|
assertEquals(1, theSuggestion.size());
|
||||||
|
assertEquals("foo", theSuggestion.iterator().next());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCollate() throws Exception {
|
public void testCollate() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
SolrCore core = h.getCore();
|
||||||
|
|
|
@ -80,15 +80,16 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = core.getSearcher().get().getReader();
|
IndexReader reader = core.getSearcher().get().getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("fob");
|
Collection<Token> tokens = queryConverter.convert("fob");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||||
assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true);
|
assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true);
|
||||||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||||
|
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
@ -118,7 +119,9 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = core.getSearcher().get().getReader();
|
IndexReader reader = core.getSearcher().get().getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("Solar");
|
Collection<Token> tokens = queryConverter.convert("Solar");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
|
||||||
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||||
|
@ -128,8 +131,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||||
|
|
||||||
//test something not in the spell checker
|
//test something not in the spell checker
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
@ -160,7 +163,8 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = core.getSearcher().get().getReader();
|
IndexReader reader = core.getSearcher().get().getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("solar");
|
Collection<Token> tokens = queryConverter.convert("solar");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
||||||
|
@ -170,10 +174,10 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||||
|
|
||||||
|
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,10 +125,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = searcher.getReader();
|
IndexReader reader = searcher.getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("documemt");
|
Collection<Token> tokens = queryConverter.convert("documemt");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader);
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
||||||
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||||
|
@ -136,32 +137,33 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
|
||||||
|
|
||||||
//test something not in the spell checker
|
//test something not in the spell checker
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
|
||||||
//test something that is spelled correctly
|
//test something that is spelled correctly
|
||||||
tokens = queryConverter.convert("document");
|
spellOpts.tokens = queryConverter.convert("document");
|
||||||
result = checker.getSuggestions(tokens, reader);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
|
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
|
||||||
|
|
||||||
//Has multiple possibilities, but the exact exists, so that should be returned
|
//Has multiple possibilities, but the exact exists, so that should be returned
|
||||||
tokens = queryConverter.convert("red");
|
spellOpts.tokens = queryConverter.convert("red");
|
||||||
result = checker.getSuggestions(tokens, reader, 2);
|
spellOpts.count = 2;
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
result = checker.getSuggestions(spellOpts);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
assertNotNull(result);
|
||||||
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
|
||||||
//Try out something which should have multiple suggestions
|
//Try out something which should have multiple suggestions
|
||||||
tokens = queryConverter.convert("bug");
|
spellOpts.tokens = queryConverter.convert("bug");
|
||||||
result = checker.getSuggestions(tokens, reader, 2);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertNotNull(result);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is null and it shouldn't be", suggestions != null);
|
assertNotNull(suggestions);
|
||||||
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
|
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
|
||||||
|
|
||||||
entry = suggestions.entrySet().iterator().next();
|
entry = suggestions.entrySet().iterator().next();
|
||||||
|
@ -198,10 +200,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = searcher.getReader();
|
IndexReader reader = searcher.getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("documemt");
|
Collection<Token> tokens = queryConverter.convert("documemt");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
assertTrue("documemt is null and it shouldn't be", suggestions != null);
|
||||||
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||||
|
@ -209,16 +212,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
|
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
|
||||||
|
|
||||||
//test something not in the spell checker
|
//test something not in the spell checker
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
|
||||||
tokens = queryConverter.convert("document");
|
spellOpts.tokens = queryConverter.convert("document");
|
||||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
} finally {
|
} finally {
|
||||||
holder.decref();
|
holder.decref();
|
||||||
|
@ -304,10 +307,11 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
IndexReader reader = searcher.getReader();
|
IndexReader reader = searcher.getReader();
|
||||||
Collection<Token> tokens = queryConverter.convert("flesh");
|
Collection<Token> tokens = queryConverter.convert("flesh");
|
||||||
SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
|
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
|
||||||
|
SpellingResult result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
//should be lowercased, b/c we are using a lowercasing analyzer
|
//should be lowercased, b/c we are using a lowercasing analyzer
|
||||||
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
|
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("flesh is null and it shouldn't be", suggestions != null);
|
assertTrue("flesh is null and it shouldn't be", suggestions != null);
|
||||||
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
|
||||||
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
|
||||||
|
@ -315,16 +319,16 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
|
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
|
||||||
|
|
||||||
//test something not in the spell checker
|
//test something not in the spell checker
|
||||||
tokens = queryConverter.convert("super");
|
spellOpts.tokens = queryConverter.convert("super");
|
||||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
|
|
||||||
tokens = queryConverter.convert("Caroline");
|
spellOpts.tokens = queryConverter.convert("Caroline");
|
||||||
result = checker.getSuggestions(tokens, reader, 1, false, true);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
} finally {
|
} finally {
|
||||||
holder.decref();
|
holder.decref();
|
||||||
|
|
|
@ -377,7 +377,11 @@
|
||||||
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
|
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
|
||||||
<str name="buildOnCommit">true</str>
|
<str name="buildOnCommit">true</str>
|
||||||
</lst>
|
</lst>
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">perDict</str>
|
||||||
|
<str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
|
||||||
|
<str name="field">lowerfilt</str>
|
||||||
|
</lst>
|
||||||
</searchComponent>
|
</searchComponent>
|
||||||
|
|
||||||
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
||||||
|
|
Loading…
Reference in New Issue