LUCENE-3436: add spellchecker SuggestMode

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1171556 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-09-16 13:41:29 +00:00
parent 4f553352d1
commit c0965ed3a2
7 changed files with 265 additions and 44 deletions

View File

@ -92,6 +92,11 @@ Bug Fixes
* LUCENE-3019: Fix unexpected color tags for FastVectorHighlighter. (Koji Sekiguchi) * LUCENE-3019: Fix unexpected color tags for FastVectorHighlighter. (Koji Sekiguchi)
API Changes
* LUCENE-3436: Add SuggestMode to the spellchecker, so you can specify the strategy
for suggesting related terms. (James Dyer via Robert Muir)
======================= Lucene 3.4.0 ================ ======================= Lucene 3.4.0 ================
New Features New Features

View File

@ -290,21 +290,22 @@ public class DirectSpellChecker {
} }
/** /**
* Calls {@link #suggestSimilar(Term, int, IndexReader, boolean) * Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode)
* suggestSimilar(term, numSug, ir, false)} * suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)}
*/ */
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir) public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir)
throws IOException { throws IOException {
return suggestSimilar(term, numSug, ir, false); return suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
} }
/** /**
* Calls {@link #suggestSimilar(Term, int, IndexReader, boolean, float) * Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode, float)
* suggestSimilar(term, numSug, ir, morePopular, this.accuracy)} * suggestSimilar(term, numSug, ir, suggestMode, this.accuracy)}
*
*/ */
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir, public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
boolean morePopular) throws IOException { SuggestMode suggestMode) throws IOException {
return suggestSimilar(term, numSug, ir, morePopular, accuracy); return suggestSimilar(term, numSug, ir, suggestMode, this.accuracy);
} }
/** /**
@ -323,7 +324,7 @@ public class DirectSpellChecker {
* @throws IOException * @throws IOException
*/ */
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir, public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
boolean morePopular, float accuracy) throws IOException { SuggestMode suggestMode, float accuracy) throws IOException {
final CharsRef spare = new CharsRef(); final CharsRef spare = new CharsRef();
String text = term.text(); String text = term.text();
if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength) if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength)
@ -335,9 +336,7 @@ public class DirectSpellChecker {
int docfreq = ir.docFreq(term); int docfreq = ir.docFreq(term);
// see line 341 of spellchecker. this is certainly very very nice for perf, if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0) {
// but is it really the right way to go?
if (!morePopular && docfreq > 0) {
return new SuggestWord[0]; return new SuggestWord[0];
} }
@ -349,7 +348,7 @@ public class DirectSpellChecker {
return new SuggestWord[0]; return new SuggestWord[0];
} }
if (!morePopular) docfreq = 0; if (suggestMode!=SuggestMode.SUGGEST_MORE_POPULAR) docfreq = 0;
if (thresholdFrequency >= 1f) { if (thresholdFrequency >= 1f) {
docfreq = Math.max(docfreq, (int) thresholdFrequency); docfreq = Math.max(docfreq, (int) thresholdFrequency);

View File

@ -247,7 +247,7 @@ public class SpellChecker implements java.io.Closeable {
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/ */
public String[] suggestSimilar(String word, int numSug) throws IOException { public String[] suggestSimilar(String word, int numSug) throws IOException {
return this.suggestSimilar(word, numSug, null, null, false); return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
} }
/** /**
@ -271,7 +271,7 @@ public class SpellChecker implements java.io.Closeable {
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/ */
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException { public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
return this.suggestSimilar(word, numSug, null, null, false, accuracy); return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
} }
/** /**
@ -300,8 +300,16 @@ public class SpellChecker implements java.io.Closeable {
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index * of the suggest words in the field of the user index
* *
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) * @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
*
* @deprecated
* use suggestSimilar(String, int, IndexReader, String, SuggestMode)
* <ul>
* <li>SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX instead of morePopular=false</li>
* <li>SuggestMode.SuGGEST_MORE_POPULAR instead of morePopular=true</li>
* </ul>
*/ */
@Deprecated
public String[] suggestSimilar(String word, int numSug, IndexReader ir, public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, boolean morePopular) throws IOException { String field, boolean morePopular) throws IOException {
return suggestSimilar(word, numSug, ir, field, morePopular, accuracy); return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
@ -332,19 +340,78 @@ public class SpellChecker implements java.io.Closeable {
* @return String[] the sorted list of the suggest words with these 2 criteria: * @return String[] the sorted list of the suggest words with these 2 criteria:
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index * of the suggest words in the field of the user index
*
* @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
*
* @deprecated
* use suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
* <ul>
* <li>SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX instead of morePopular=false</li>
* <li>SuggestMode.SuGGEST_MORE_POPULAR instead of morePopular=true</li>
* </ul>
*/ */
@Deprecated
public String[] suggestSimilar(String word, int numSug, IndexReader ir, public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, boolean morePopular, float accuracy) throws IOException { String field, boolean morePopular, float accuracy) throws IOException {
return suggestSimilar(word, numSug, ir, field, morePopular ? SuggestMode.SUGGEST_MORE_POPULAR :
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
}
/**
* Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
* suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)}
*
*/
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, SuggestMode suggestMode) throws IOException {
return suggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy);
}
/**
* Suggest similar words (optionally restricted to a field of an index).
*
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
* is not the same as the edit distance strategy used to calculate the best
* matching spell-checked word from the hits that Lucene found, one usually has
* to retrieve a couple of numSug's in order to get the true best match.
*
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
* @param word the word you want a spell check done on
* @param numSug the number of suggested words
* @param ir the indexReader of the user index (can be null see field param)
* @param field the field of the user index: if field is not null, the suggested
* words are restricted to the words present in this field.
* @param suggestMode
* (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS)
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[] the sorted list of the suggest words with these 2 criteria:
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index
*
*/
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, SuggestMode suggestMode, float accuracy) throws IOException {
// obtainSearcher calls ensureOpen // obtainSearcher calls ensureOpen
final IndexSearcher indexSearcher = obtainSearcher(); final IndexSearcher indexSearcher = obtainSearcher();
try { try {
if (ir == null || field == null) {
suggestMode = SuggestMode.SUGGEST_ALWAYS;
}
if (suggestMode == SuggestMode.SUGGEST_ALWAYS) {
ir = null;
field = null;
}
final int lengthWord = word.length(); final int lengthWord = word.length();
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0; final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0; final int goalFreq = suggestMode==SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself // if the word exists in the real index and we don't care for word frequency, return the word itself
if (!morePopular && freq > 0) { if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) {
return new String[] { word }; return new String[] { word };
} }
@ -403,7 +470,7 @@ public class SpellChecker implements java.io.Closeable {
if (ir != null && field != null) { // use the user index if (ir != null && field != null) { // use the user index
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
// don't suggest a word that is not present in the field // don't suggest a word that is not present in the field
if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) { if ((suggestMode==SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1) {
continue; continue;
} }
} }

View File

@ -0,0 +1,42 @@
package org.apache.lucene.search.spell;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Set of strategies for suggesting related terms
* @lucene.experimental
*/
public enum SuggestMode {
/**
* Generate suggestions only for terms not in the index (default)
*/
SUGGEST_WHEN_NOT_IN_INDEX,
/**
* Return only suggested words that are as frequent or more frequent than the
* searched word
*/
SUGGEST_MORE_POPULAR,
/**
* Always attempt to offer suggestions (however, other parameters may limit
* suggestions. For example, see
* {@link DirectSpellChecker.setMaxQueryFrequency} ).
*/
SUGGEST_ALWAYS
}

View File

@ -45,29 +45,35 @@ public class TestDirectSpellChecker extends LuceneTestCase {
IndexReader ir = writer.getReader(); IndexReader ir = writer.getReader();
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers",
"fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
assertEquals("five", similar[0].string); assertEquals("five", similar[0].string);
similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
if (similar.length > 0) { if (similar.length > 0) {
assertFalse(similar[0].string.equals("five")); // don't suggest a word for itself assertFalse(similar[0].string.equals("five")); // don't suggest a word for itself
} }
similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
assertEquals("five", similar[0].string); assertEquals("five", similar[0].string);
similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
assertEquals("five", similar[0].string); assertEquals("five", similar[0].string);
similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
assertEquals("five", similar[0].string); assertEquals("five", similar[0].string);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals("five", similar[0].string); assertEquals("five", similar[0].string);
// add some more documents // add some more documents
@ -81,7 +87,8 @@ public class TestDirectSpellChecker extends LuceneTestCase {
ir = writer.getReader(); ir = writer.getReader();
// look ma, no spellcheck index rebuild // look ma, no spellcheck index rebuild
similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10, ir, false); similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10,
ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
assertEquals("thousand", similar[0].string); assertEquals("thousand", similar[0].string);
@ -109,34 +116,48 @@ public class TestDirectSpellChecker extends LuceneTestCase {
DirectSpellChecker spellChecker = new DirectSpellChecker(); DirectSpellChecker spellChecker = new DirectSpellChecker();
spellChecker.setMaxQueryFrequency(0F); spellChecker.setMaxQueryFrequency(0F);
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 1, ir, true); SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text",
"fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length); assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMinQueryLength(5); spellChecker.setMinQueryLength(5);
similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir, true); similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length); assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMaxEdits(1); spellChecker.setMaxEdits(1);
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true); similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length); assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setAccuracy(0.9F); spellChecker.setAccuracy(0.9F);
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true); similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length); assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMinPrefix(0); spellChecker.setMinPrefix(0);
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true); similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(1, similar.length); assertEquals(1, similar.length);
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
spellChecker = new DirectSpellChecker(); // reset defaults spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMinPrefix(1); spellChecker.setMinPrefix(1);
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true); similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length); assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMaxEdits(2);
similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 2, ir,
SuggestMode.SUGGEST_ALWAYS);
assertEquals(2, similar.length);
ir.close(); ir.close();
writer.close(); writer.close();
dir.close(); dir.close();
@ -156,7 +177,9 @@ public class TestDirectSpellChecker extends LuceneTestCase {
IndexReader ir = writer.getReader(); IndexReader ir = writer.getReader();
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("bogusFieldBogusField", "fvie"), 2, ir, false); SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
"bogusFieldBogusField", "fvie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.length); assertEquals(0, similar.length);
ir.close(); ir.close();
writer.close(); writer.close();

View File

@ -63,6 +63,29 @@ public class TestSpellChecker extends LuceneTestCase {
doc.add(newField("field3", "fvei" + (i % 2 == 0 ? " five" : ""), TextField.TYPE_STORED)); // + word thousand doc.add(newField("field3", "fvei" + (i % 2 == 0 ? " five" : ""), TextField.TYPE_STORED)); // + word thousand
writer.addDocument(doc); writer.addDocument(doc);
} }
{
Document doc = new Document();
doc.add(newField("field1", "eight", TextField.TYPE_STORED)); // "eight" in
// the index
// twice
writer.addDocument(doc);
}
{
Document doc = new Document();
doc
.add(newField("field1", "twenty-one twenty-one",
TextField.TYPE_STORED)); // "twenty-one" in the index thrice
writer.addDocument(doc);
}
{
Document doc = new Document();
doc.add(newField("field1", "twenty", TextField.TYPE_STORED)); // "twenty"
// in the
// index
// twice
writer.addDocument(doc);
}
writer.close(); writer.close();
searchers = Collections.synchronizedList(new ArrayList<IndexSearcher>()); searchers = Collections.synchronizedList(new ArrayList<IndexSearcher>());
// create the spellChecker // create the spellChecker
@ -126,7 +149,8 @@ public class TestSpellChecker extends LuceneTestCase {
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator()); SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
addwords(r, compareSP, "field3"); addwords(r, compareSP, "field3");
String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", false); String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.length == 2); assertTrue(similar.length == 2);
//five and fvei have the same score, but different frequencies. //five and fvei have the same score, but different frequencies.
assertEquals("fvei", similar[0]); assertEquals("fvei", similar[0]);
@ -143,7 +167,8 @@ public class TestSpellChecker extends LuceneTestCase {
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator()); SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
addwords(r, compareSP, "field3"); addwords(r, compareSP, "field3");
String[] similar = compareSP.suggestSimilar("fvie", 2, r, "bogusFieldBogusField", false); String[] similar = compareSP.suggestSimilar("fvie", 2, r,
"bogusFieldBogusField", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.length); assertEquals(0, similar.length);
r.close(); r.close();
if (!compareSP.isClosed()) if (!compareSP.isClosed())
@ -151,6 +176,60 @@ public class TestSpellChecker extends LuceneTestCase {
compIdx.close(); compIdx.close();
} }
public void testSuggestModes() throws Exception {
IndexReader r = IndexReader.open(userindex, true);
spellChecker.clearIndex();
addwords(r, spellChecker, "field1");
{
String[] similar = spellChecker.suggestSimilar("eighty", 2, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length);
assertEquals("eighty", similar[0]);
}
{
String[] similar = spellChecker.suggestSimilar("eight", 2, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length);
assertEquals("eight", similar[0]);
}
{
String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(5, similar.length);
assertEquals("eight", similar[0]);
}
{
String[] similar = spellChecker.suggestSimilar("twenty", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(1, similar.length);
assertEquals("twenty-one", similar[0]);
}
{
String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length);
}
{
String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1",
SuggestMode.SUGGEST_ALWAYS);
assertEquals(5, similar.length);
assertEquals("eight", similar[0]);
}
{
String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1",
SuggestMode.SUGGEST_ALWAYS);
assertEquals(5, similar.length);
assertEquals("eighty", similar[0]);
}
r.close();
}
private void checkCommonSuggestions(IndexReader r) throws IOException { private void checkCommonSuggestions(IndexReader r) throws IOException {
String[] similar = spellChecker.suggestSimilar("fvie", 2); String[] similar = spellChecker.suggestSimilar("fvie", 2);
assertTrue(similar.length > 0); assertTrue(similar.length > 0);
@ -174,10 +253,12 @@ public class TestSpellChecker extends LuceneTestCase {
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
// test restraint to a field // test restraint to a field
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false); similar = spellChecker.suggestSimilar("tousand", 10, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.length); // there isn't the term thousand in the field field1 assertEquals(0, similar.length); // there isn't the term thousand in the field field1
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false); similar = spellChecker.suggestSimilar("tousand", 10, r, "field2",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length); // there is the term thousand in the field field2 assertEquals(1, similar.length); // there is the term thousand in the field field2
} }
@ -214,10 +295,12 @@ public class TestSpellChecker extends LuceneTestCase {
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
// test restraint to a field // test restraint to a field
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false); similar = spellChecker.suggestSimilar("tousand", 10, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.length); // there isn't the term thousand in the field field1 assertEquals(0, similar.length); // there isn't the term thousand in the field field1
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false); similar = spellChecker.suggestSimilar("tousand", 10, r, "field2",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length); // there is the term thousand in the field field2 assertEquals(1, similar.length); // there is the term thousand in the field field2
similar = spellChecker.suggestSimilar("onety", 2); similar = spellChecker.suggestSimilar("onety", 2);
@ -225,7 +308,8 @@ public class TestSpellChecker extends LuceneTestCase {
assertEquals(similar[0], "ninety"); assertEquals(similar[0], "ninety");
assertEquals(similar[1], "one"); assertEquals(similar[1], "one");
try { try {
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false); similar = spellChecker.suggestSimilar("tousand", 10, r, null,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
} catch (NullPointerException e) { } catch (NullPointerException e) {
assertTrue("threw an NPE, and it shouldn't have", false); assertTrue("threw an NPE, and it shouldn't have", false);
} }

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.lucene.search.spell.SuggestWordQueue;
@ -195,11 +196,11 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
SpellingResult result = new SpellingResult(); SpellingResult result = new SpellingResult();
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
SuggestMode mode = options.onlyMorePopular ? SuggestMode.SUGGEST_MORE_POPULAR : SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
for (Token token : options.tokens) { for (Token token : options.tokens) {
Term term = new Term(field, token.toString()); Term term = new Term(field, token.toString());
SuggestWord[] suggestions = checker.suggestSimilar(term, SuggestWord[] suggestions = checker.suggestSimilar(term,
options.count, options.reader, options.onlyMorePopular, accuracy); options.count, options.reader, mode, accuracy);
result.addFrequency(token, options.reader.docFreq(term)); result.addFrequency(token, options.reader.docFreq(term));
for (SuggestWord suggestion : suggestions) { for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq); result.add(token, suggestion.string, suggestion.freq);