mirror of https://github.com/apache/lucene.git
LUCENE-3436: add spellchecker SuggestMode
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1171556 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f553352d1
commit
c0965ed3a2
|
@ -92,6 +92,11 @@ Bug Fixes
|
|||
|
||||
* LUCENE-3019: Fix unexpected color tags for FastVectorHighlighter. (Koji Sekiguchi)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3436: Add SuggestMode to the spellchecker, so you can specify the strategy
|
||||
for suggesting related terms. (James Dyer via Robert Muir)
|
||||
|
||||
======================= Lucene 3.4.0 ================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -290,21 +290,22 @@ public class DirectSpellChecker {
|
|||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #suggestSimilar(Term, int, IndexReader, boolean)
|
||||
* suggestSimilar(term, numSug, ir, false)}
|
||||
* Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode)
|
||||
* suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)}
|
||||
*/
|
||||
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir)
|
||||
throws IOException {
|
||||
return suggestSimilar(term, numSug, ir, false);
|
||||
return suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #suggestSimilar(Term, int, IndexReader, boolean, float)
|
||||
* suggestSimilar(term, numSug, ir, morePopular, this.accuracy)}
|
||||
* Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode, float)
|
||||
* suggestSimilar(term, numSug, ir, suggestMode, this.accuracy)}
|
||||
*
|
||||
*/
|
||||
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
|
||||
boolean morePopular) throws IOException {
|
||||
return suggestSimilar(term, numSug, ir, morePopular, accuracy);
|
||||
SuggestMode suggestMode) throws IOException {
|
||||
return suggestSimilar(term, numSug, ir, suggestMode, this.accuracy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -323,7 +324,7 @@ public class DirectSpellChecker {
|
|||
* @throws IOException
|
||||
*/
|
||||
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
|
||||
boolean morePopular, float accuracy) throws IOException {
|
||||
SuggestMode suggestMode, float accuracy) throws IOException {
|
||||
final CharsRef spare = new CharsRef();
|
||||
String text = term.text();
|
||||
if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength)
|
||||
|
@ -335,9 +336,7 @@ public class DirectSpellChecker {
|
|||
|
||||
int docfreq = ir.docFreq(term);
|
||||
|
||||
// see line 341 of spellchecker. this is certainly very very nice for perf,
|
||||
// but is it really the right way to go?
|
||||
if (!morePopular && docfreq > 0) {
|
||||
if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0) {
|
||||
return new SuggestWord[0];
|
||||
}
|
||||
|
||||
|
@ -349,7 +348,7 @@ public class DirectSpellChecker {
|
|||
return new SuggestWord[0];
|
||||
}
|
||||
|
||||
if (!morePopular) docfreq = 0;
|
||||
if (suggestMode!=SuggestMode.SUGGEST_MORE_POPULAR) docfreq = 0;
|
||||
|
||||
if (thresholdFrequency >= 1f) {
|
||||
docfreq = Math.max(docfreq, (int) thresholdFrequency);
|
||||
|
|
|
@ -247,7 +247,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug) throws IOException {
|
||||
return this.suggestSimilar(word, numSug, null, null, false);
|
||||
return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -271,7 +271,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
|
||||
return this.suggestSimilar(word, numSug, null, null, false, accuracy);
|
||||
return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -300,8 +300,16 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||
* of the suggest words in the field of the user index
|
||||
*
|
||||
* @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
|
||||
* @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
|
||||
*
|
||||
* @deprecated
|
||||
* use suggestSimilar(String, int, IndexReader, String, SuggestMode)
|
||||
* <ul>
|
||||
* <li>SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX instead of morePopular=false</li>
|
||||
* <li>SuggestMode.SuGGEST_MORE_POPULAR instead of morePopular=true</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Deprecated
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, boolean morePopular) throws IOException {
|
||||
return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
|
||||
|
@ -332,19 +340,78 @@ public class SpellChecker implements java.io.Closeable {
|
|||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||
* of the suggest words in the field of the user index
|
||||
*
|
||||
* @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
|
||||
*
|
||||
* @deprecated
|
||||
* use suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
|
||||
* <ul>
|
||||
* <li>SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX instead of morePopular=false</li>
|
||||
* <li>SuggestMode.SuGGEST_MORE_POPULAR instead of morePopular=true</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Deprecated
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, boolean morePopular, float accuracy) throws IOException {
|
||||
return suggestSimilar(word, numSug, ir, field, morePopular ? SuggestMode.SUGGEST_MORE_POPULAR :
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)
|
||||
* suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)}
|
||||
*
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, SuggestMode suggestMode) throws IOException {
|
||||
return suggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest similar words (optionally restricted to a field of an index).
|
||||
*
|
||||
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
|
||||
* is not the same as the edit distance strategy used to calculate the best
|
||||
* matching spell-checked word from the hits that Lucene found, one usually has
|
||||
* to retrieve a couple of numSug's in order to get the true best match.
|
||||
*
|
||||
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
|
||||
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
|
||||
*
|
||||
* @param word the word you want a spell check done on
|
||||
* @param numSug the number of suggested words
|
||||
* @param ir the indexReader of the user index (can be null see field param)
|
||||
* @param field the field of the user index: if field is not null, the suggested
|
||||
* words are restricted to the words present in this field.
|
||||
* @param suggestMode
|
||||
* (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS)
|
||||
* @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||
* of the suggest words in the field of the user index
|
||||
*
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, SuggestMode suggestMode, float accuracy) throws IOException {
|
||||
// obtainSearcher calls ensureOpen
|
||||
final IndexSearcher indexSearcher = obtainSearcher();
|
||||
try{
|
||||
try {
|
||||
if (ir == null || field == null) {
|
||||
suggestMode = SuggestMode.SUGGEST_ALWAYS;
|
||||
}
|
||||
if (suggestMode == SuggestMode.SUGGEST_ALWAYS) {
|
||||
ir = null;
|
||||
field = null;
|
||||
}
|
||||
|
||||
final int lengthWord = word.length();
|
||||
|
||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
|
||||
final int goalFreq = suggestMode==SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
|
||||
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
||||
if (!morePopular && freq > 0) {
|
||||
if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) {
|
||||
return new String[] { word };
|
||||
}
|
||||
|
||||
|
@ -403,7 +470,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
if (ir != null && field != null) { // use the user index
|
||||
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
|
||||
// don't suggest a word that is not present in the field
|
||||
if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) {
|
||||
if ((suggestMode==SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.search.spell;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Set of strategies for suggesting related terms
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public enum SuggestMode {
|
||||
/**
|
||||
* Generate suggestions only for terms not in the index (default)
|
||||
*/
|
||||
SUGGEST_WHEN_NOT_IN_INDEX,
|
||||
|
||||
/**
|
||||
* Return only suggested words that are as frequent or more frequent than the
|
||||
* searched word
|
||||
*/
|
||||
SUGGEST_MORE_POPULAR,
|
||||
|
||||
/**
|
||||
* Always attempt to offer suggestions (however, other parameters may limit
|
||||
* suggestions. For example, see
|
||||
* {@link DirectSpellChecker.setMaxQueryFrequency} ).
|
||||
*/
|
||||
SUGGEST_ALWAYS
|
||||
}
|
|
@ -45,29 +45,35 @@ public class TestDirectSpellChecker extends LuceneTestCase {
|
|||
|
||||
IndexReader ir = writer.getReader();
|
||||
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false);
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers",
|
||||
"fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals("five", similar[0].string);
|
||||
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
if (similar.length > 0) {
|
||||
assertFalse(similar[0].string.equals("five")); // don't suggest a word for itself
|
||||
}
|
||||
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals("five", similar[0].string);
|
||||
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals("five", similar[0].string);
|
||||
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals("five", similar[0].string);
|
||||
|
||||
assertTrue(similar.length > 0);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals("five", similar[0].string);
|
||||
|
||||
// add some more documents
|
||||
|
@ -81,7 +87,8 @@ public class TestDirectSpellChecker extends LuceneTestCase {
|
|||
ir = writer.getReader();
|
||||
|
||||
// look ma, no spellcheck index rebuild
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10, ir, false);
|
||||
similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10,
|
||||
ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length > 0);
|
||||
assertEquals("thousand", similar[0].string);
|
||||
|
||||
|
@ -109,34 +116,48 @@ public class TestDirectSpellChecker extends LuceneTestCase {
|
|||
|
||||
DirectSpellChecker spellChecker = new DirectSpellChecker();
|
||||
spellChecker.setMaxQueryFrequency(0F);
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 1, ir, true);
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text",
|
||||
"fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setMinQueryLength(5);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir, true);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setMaxEdits(1);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setAccuracy(0.9F);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setMinPrefix(0);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(1, similar.length);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setMinPrefix(1);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir,
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
spellChecker = new DirectSpellChecker(); // reset defaults
|
||||
spellChecker.setMaxEdits(2);
|
||||
similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 2, ir,
|
||||
SuggestMode.SUGGEST_ALWAYS);
|
||||
assertEquals(2, similar.length);
|
||||
|
||||
ir.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
|
@ -156,7 +177,9 @@ public class TestDirectSpellChecker extends LuceneTestCase {
|
|||
|
||||
IndexReader ir = writer.getReader();
|
||||
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term("bogusFieldBogusField", "fvie"), 2, ir, false);
|
||||
SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
|
||||
"bogusFieldBogusField", "fvie"), 2, ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(0, similar.length);
|
||||
ir.close();
|
||||
writer.close();
|
||||
|
|
|
@ -63,6 +63,29 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
doc.add(newField("field3", "fvei" + (i % 2 == 0 ? " five" : ""), TextField.TYPE_STORED)); // + word thousand
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
{
|
||||
Document doc = new Document();
|
||||
doc.add(newField("field1", "eight", TextField.TYPE_STORED)); // "eight" in
|
||||
// the index
|
||||
// twice
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
{
|
||||
Document doc = new Document();
|
||||
doc
|
||||
.add(newField("field1", "twenty-one twenty-one",
|
||||
TextField.TYPE_STORED)); // "twenty-one" in the index thrice
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
{
|
||||
Document doc = new Document();
|
||||
doc.add(newField("field1", "twenty", TextField.TYPE_STORED)); // "twenty"
|
||||
// in the
|
||||
// index
|
||||
// twice
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
searchers = Collections.synchronizedList(new ArrayList<IndexSearcher>());
|
||||
// create the spellChecker
|
||||
|
@ -126,7 +149,8 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
|
||||
addwords(r, compareSP, "field3");
|
||||
|
||||
String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", false);
|
||||
String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertTrue(similar.length == 2);
|
||||
//five and fvei have the same score, but different frequencies.
|
||||
assertEquals("fvei", similar[0]);
|
||||
|
@ -143,7 +167,8 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
|
||||
addwords(r, compareSP, "field3");
|
||||
|
||||
String[] similar = compareSP.suggestSimilar("fvie", 2, r, "bogusFieldBogusField", false);
|
||||
String[] similar = compareSP.suggestSimilar("fvie", 2, r,
|
||||
"bogusFieldBogusField", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(0, similar.length);
|
||||
r.close();
|
||||
if (!compareSP.isClosed())
|
||||
|
@ -151,6 +176,60 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
compIdx.close();
|
||||
}
|
||||
|
||||
public void testSuggestModes() throws Exception {
|
||||
IndexReader r = IndexReader.open(userindex, true);
|
||||
spellChecker.clearIndex();
|
||||
addwords(r, spellChecker, "field1");
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eighty", 2, r, "field1",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals("eighty", similar[0]);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eight", 2, r, "field1",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals("eight", similar[0]);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1",
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(5, similar.length);
|
||||
assertEquals("eight", similar[0]);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("twenty", 5, r, "field1",
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals("twenty-one", similar[0]);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1",
|
||||
SuggestMode.SUGGEST_MORE_POPULAR);
|
||||
assertEquals(0, similar.length);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1",
|
||||
SuggestMode.SUGGEST_ALWAYS);
|
||||
assertEquals(5, similar.length);
|
||||
assertEquals("eight", similar[0]);
|
||||
}
|
||||
|
||||
{
|
||||
String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1",
|
||||
SuggestMode.SUGGEST_ALWAYS);
|
||||
assertEquals(5, similar.length);
|
||||
assertEquals("eighty", similar[0]);
|
||||
}
|
||||
r.close();
|
||||
}
|
||||
private void checkCommonSuggestions(IndexReader r) throws IOException {
|
||||
String[] similar = spellChecker.suggestSimilar("fvie", 2);
|
||||
assertTrue(similar.length > 0);
|
||||
|
@ -174,10 +253,12 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
assertEquals(similar[0], "five");
|
||||
|
||||
// test restraint to a field
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(0, similar.length); // there isn't the term thousand in the field field1
|
||||
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||
}
|
||||
|
||||
|
@ -214,10 +295,12 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
assertEquals(similar[0], "five");
|
||||
|
||||
// test restraint to a field
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(0, similar.length); // there isn't the term thousand in the field field1
|
||||
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2",
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||
|
||||
similar = spellChecker.suggestSimilar("onety", 2);
|
||||
|
@ -225,7 +308,8 @@ public class TestSpellChecker extends LuceneTestCase {
|
|||
assertEquals(similar[0], "ninety");
|
||||
assertEquals(similar[1], "one");
|
||||
try {
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, null,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||
} catch (NullPointerException e) {
|
||||
assertTrue("threw an NPE, and it shouldn't have", false);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
|
@ -195,11 +196,11 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
|||
|
||||
SpellingResult result = new SpellingResult();
|
||||
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
|
||||
|
||||
SuggestMode mode = options.onlyMorePopular ? SuggestMode.SUGGEST_MORE_POPULAR : SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
||||
for (Token token : options.tokens) {
|
||||
Term term = new Term(field, token.toString());
|
||||
SuggestWord[] suggestions = checker.suggestSimilar(term,
|
||||
options.count, options.reader, options.onlyMorePopular, accuracy);
|
||||
options.count, options.reader, mode, accuracy);
|
||||
result.addFrequency(token, options.reader.docFreq(term));
|
||||
for (SuggestWord suggestion : suggestions) {
|
||||
result.add(token, suggestion.string, suggestion.freq);
|
||||
|
|
Loading…
Reference in New Issue