From c0965ed3a2c9e9c237065a8ff5cdf9901af5e865 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 16 Sep 2011 13:41:29 +0000 Subject: [PATCH] LUCENE-3436: add spellchecker SuggestMode git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1171556 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 5 + .../search/spell/DirectSpellChecker.java | 23 ++-- .../lucene/search/spell/SpellChecker.java | 81 ++++++++++++-- .../lucene/search/spell/SuggestMode.java | 42 ++++++++ .../search/spell/TestDirectSpellChecker.java | 53 +++++++--- .../lucene/search/spell/TestSpellChecker.java | 100 ++++++++++++++++-- .../solr/spelling/DirectSolrSpellChecker.java | 5 +- 7 files changed, 265 insertions(+), 44 deletions(-) create mode 100644 modules/suggest/src/java/org/apache/lucene/search/spell/SuggestMode.java diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index f9474ffeb61..2a670b5c5cd 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -92,6 +92,11 @@ Bug Fixes * LUCENE-3019: Fix unexpected color tags for FastVectorHighlighter. (Koji Sekiguchi) +API Changes + + * LUCENE-3436: Add SuggestMode to the spellchecker, so you can specify the strategy + for suggesting related terms. (James Dyer via Robert Muir) + ======================= Lucene 3.4.0 ================ New Features diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java index 66b76d7d18c..88e5ca1358c 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java @@ -290,21 +290,22 @@ public class DirectSpellChecker { } /** - * Calls {@link #suggestSimilar(Term, int, IndexReader, boolean) - * suggestSimilar(term, numSug, ir, false)} + * Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode) + * suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)} */ public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir) throws IOException { - return suggestSimilar(term, numSug, ir, false); + return suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); } /** - * Calls {@link #suggestSimilar(Term, int, IndexReader, boolean, float) - * suggestSimilar(term, numSug, ir, morePopular, this.accuracy)} + * Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode, float) + * suggestSimilar(term, numSug, ir, suggestMode, this.accuracy)} + * */ public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir, - boolean morePopular) throws IOException { - return suggestSimilar(term, numSug, ir, morePopular, accuracy); + SuggestMode suggestMode) throws IOException { + return suggestSimilar(term, numSug, ir, suggestMode, this.accuracy); } /** @@ -323,7 +324,7 @@ public class DirectSpellChecker { * @throws IOException */ public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir, - boolean morePopular, float accuracy) throws IOException { + SuggestMode suggestMode, float accuracy) throws IOException { final CharsRef spare = new CharsRef(); String text = term.text(); if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength) @@ -335,9 +336,7 @@ public class DirectSpellChecker { int docfreq = ir.docFreq(term); - // see line 341 of spellchecker. this is certainly very very nice for perf, - // but is it really the right way to go? - if (!morePopular && docfreq > 0) { + if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0) { return new SuggestWord[0]; } @@ -349,7 +348,7 @@ public class DirectSpellChecker { return new SuggestWord[0]; } - if (!morePopular) docfreq = 0; + if (suggestMode!=SuggestMode.SUGGEST_MORE_POPULAR) docfreq = 0; if (thresholdFrequency >= 1f) { docfreq = Math.max(docfreq, (int) thresholdFrequency); diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java index 2d04163c578..450d11bacac 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -247,7 +247,7 @@ public class SpellChecker implements java.io.Closeable { * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) */ public String[] suggestSimilar(String word, int numSug) throws IOException { - return this.suggestSimilar(word, numSug, null, null, false); + return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); } /** @@ -271,7 +271,7 @@ public class SpellChecker implements java.io.Closeable { * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) */ public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException { - return this.suggestSimilar(word, numSug, null, null, false, accuracy); + return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); } /** @@ -300,8 +300,16 @@ public class SpellChecker implements java.io.Closeable { * first criteria: the edit distance, second criteria (only if restricted mode): the popularity * of the suggest words in the field of the user index * - * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) + * @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) + * + * @deprecated + * use suggestSimilar(String, int, IndexReader, String, SuggestMode) + * */ + @Deprecated public String[] suggestSimilar(String word, int numSug, IndexReader ir, String field, boolean morePopular) throws IOException { return suggestSimilar(word, numSug, ir, field, morePopular, accuracy); @@ -332,19 +340,78 @@ public class SpellChecker implements java.io.Closeable { * @return String[] the sorted list of the suggest words with these 2 criteria: * first criteria: the edit distance, second criteria (only if restricted mode): the popularity * of the suggest words in the field of the user index + * + * @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) + * + * @deprecated + * use suggestSimilar(String, int, IndexReader, String, SuggestMode, float) + * */ + @Deprecated public String[] suggestSimilar(String word, int numSug, IndexReader ir, String field, boolean morePopular, float accuracy) throws IOException { + return suggestSimilar(word, numSug, ir, field, morePopular ? SuggestMode.SUGGEST_MORE_POPULAR : + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); + } + + /** + * Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) + * suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)} + * + */ + public String[] suggestSimilar(String word, int numSug, IndexReader ir, + String field, SuggestMode suggestMode) throws IOException { + return suggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy); + } + + /** + * Suggest similar words (optionally restricted to a field of an index). + * + *

As the Lucene similarity that is used to fetch the most relevant n-grammed terms + * is not the same as the edit distance strategy used to calculate the best + * matching spell-checked word from the hits that Lucene found, one usually has + * to retrieve a couple of numSug's in order to get the true best match. + * + *

I.e. if numSug == 1, don't count on that suggestion being the best one. + * Thus, you should set this value to at least 5 for a good suggestion. + * + * @param word the word you want a spell check done on + * @param numSug the number of suggested words + * @param ir the indexReader of the user index (can be null see field param) + * @param field the field of the user index: if field is not null, the suggested + * words are restricted to the words present in this field. + * @param suggestMode + * (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) + * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results + * @throws IOException if the underlying index throws an {@link IOException} + * @throws AlreadyClosedException if the Spellchecker is already closed + * @return String[] the sorted list of the suggest words with these 2 criteria: + * first criteria: the edit distance, second criteria (only if restricted mode): the popularity + * of the suggest words in the field of the user index + * + */ + public String[] suggestSimilar(String word, int numSug, IndexReader ir, + String field, SuggestMode suggestMode, float accuracy) throws IOException { // obtainSearcher calls ensureOpen final IndexSearcher indexSearcher = obtainSearcher(); - try{ + try { + if (ir == null || field == null) { + suggestMode = SuggestMode.SUGGEST_ALWAYS; + } + if (suggestMode == SuggestMode.SUGGEST_ALWAYS) { + ir = null; + field = null; + } final int lengthWord = word.length(); final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0; - final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0; + final int goalFreq = suggestMode==SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0; // if the word exists in the real index and we don't care for word frequency, return the word itself - if (!morePopular && freq > 0) { + if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) { return new String[] { word }; } @@ -403,7 +470,7 @@ public class SpellChecker implements java.io.Closeable { if (ir != null && field != null) { // use the user index sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index // don't suggest a word that is not present in the field - if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) { + if ((suggestMode==SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1) { continue; } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SuggestMode.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SuggestMode.java new file mode 100644 index 00000000000..72b269c0b14 --- /dev/null +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SuggestMode.java @@ -0,0 +1,42 @@ +package org.apache.lucene.search.spell; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Set of strategies for suggesting related terms + * @lucene.experimental + */ +public enum SuggestMode { + /** + * Generate suggestions only for terms not in the index (default) + */ + SUGGEST_WHEN_NOT_IN_INDEX, + + /** + * Return only suggested words that are as frequent or more frequent than the + * searched word + */ + SUGGEST_MORE_POPULAR, + + /** + * Always attempt to offer suggestions (however, other parameters may limit + * suggestions. For example, see + * {@link DirectSpellChecker.setMaxQueryFrequency} ). + */ + SUGGEST_ALWAYS +} diff --git a/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java b/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java index 8f8d7ca9e2c..38461acb8c6 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java +++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java @@ -45,29 +45,35 @@ public class TestDirectSpellChecker extends LuceneTestCase { IndexReader ir = writer.getReader(); - SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); + SuggestWord[] similar = spellChecker.suggestSimilar(new Term("numbers", + "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length > 0); assertEquals("five", similar[0].string); - similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); if (similar.length > 0) { assertFalse(similar[0].string.equals("five")); // don't suggest a word for itself } - similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length > 0); assertEquals("five", similar[0].string); - similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length > 0); assertEquals("five", similar[0].string); - similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length > 0); assertEquals("five", similar[0].string); assertTrue(similar.length > 0); - similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals("five", similar[0].string); // add some more documents @@ -81,7 +87,8 @@ public class TestDirectSpellChecker extends LuceneTestCase { ir = writer.getReader(); // look ma, no spellcheck index rebuild - similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10, ir, false); + similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10, + ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length > 0); assertEquals("thousand", similar[0].string); @@ -109,34 +116,48 @@ public class TestDirectSpellChecker extends LuceneTestCase { DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.setMaxQueryFrequency(0F); - SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 1, ir, true); + SuggestWord[] similar = spellChecker.suggestSimilar(new Term("text", + "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.setMinQueryLength(5); - similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir, true); + similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.setMaxEdits(1); - similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true); + similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.setAccuracy(0.9F); - similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, true); + similar = spellChecker.suggestSimilar(new Term("text", "foobazzz"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.setMinPrefix(0); - similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true); + similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(1, similar.length); - + similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); + spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.setMinPrefix(1); - similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, true); + similar = spellChecker.suggestSimilar(new Term("text", "roobaz"), 1, ir, + SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.length); + spellChecker = new DirectSpellChecker(); // reset defaults + spellChecker.setMaxEdits(2); + similar = spellChecker.suggestSimilar(new Term("text", "fobar"), 2, ir, + SuggestMode.SUGGEST_ALWAYS); + assertEquals(2, similar.length); + ir.close(); writer.close(); dir.close(); @@ -156,7 +177,9 @@ public class TestDirectSpellChecker extends LuceneTestCase { IndexReader ir = writer.getReader(); - SuggestWord[] similar = spellChecker.suggestSimilar(new Term("bogusFieldBogusField", "fvie"), 2, ir, false); + SuggestWord[] similar = spellChecker.suggestSimilar(new Term( + "bogusFieldBogusField", "fvie"), 2, ir, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(0, similar.length); ir.close(); writer.close(); diff --git a/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java b/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java index 0b685442739..f74468a3b5e 100755 --- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java +++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java @@ -63,6 +63,29 @@ public class TestSpellChecker extends LuceneTestCase { doc.add(newField("field3", "fvei" + (i % 2 == 0 ? " five" : ""), TextField.TYPE_STORED)); // + word thousand writer.addDocument(doc); } + { + Document doc = new Document(); + doc.add(newField("field1", "eight", TextField.TYPE_STORED)); // "eight" in + // the index + // twice + writer.addDocument(doc); + } + { + Document doc = new Document(); + doc + .add(newField("field1", "twenty-one twenty-one", + TextField.TYPE_STORED)); // "twenty-one" in the index thrice + writer.addDocument(doc); + } + { + Document doc = new Document(); + doc.add(newField("field1", "twenty", TextField.TYPE_STORED)); // "twenty" + // in the + // index + // twice + writer.addDocument(doc); + } + writer.close(); searchers = Collections.synchronizedList(new ArrayList()); // create the spellChecker @@ -126,7 +149,8 @@ public class TestSpellChecker extends LuceneTestCase { SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator()); addwords(r, compareSP, "field3"); - String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", false); + String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.length == 2); //five and fvei have the same score, but different frequencies. assertEquals("fvei", similar[0]); @@ -143,14 +167,69 @@ public class TestSpellChecker extends LuceneTestCase { SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator()); addwords(r, compareSP, "field3"); - String[] similar = compareSP.suggestSimilar("fvie", 2, r, "bogusFieldBogusField", false); + String[] similar = compareSP.suggestSimilar("fvie", 2, r, + "bogusFieldBogusField", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(0, similar.length); r.close(); if (!compareSP.isClosed()) compareSP.close(); compIdx.close(); } - + + public void testSuggestModes() throws Exception { + IndexReader r = IndexReader.open(userindex, true); + spellChecker.clearIndex(); + addwords(r, spellChecker, "field1"); + + { + String[] similar = spellChecker.suggestSimilar("eighty", 2, r, "field1", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); + assertEquals(1, similar.length); + assertEquals("eighty", similar[0]); + } + + { + String[] similar = spellChecker.suggestSimilar("eight", 2, r, "field1", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); + assertEquals(1, similar.length); + assertEquals("eight", similar[0]); + } + + { + String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1", + SuggestMode.SUGGEST_MORE_POPULAR); + assertEquals(5, similar.length); + assertEquals("eight", similar[0]); + } + + { + String[] similar = spellChecker.suggestSimilar("twenty", 5, r, "field1", + SuggestMode.SUGGEST_MORE_POPULAR); + assertEquals(1, similar.length); + assertEquals("twenty-one", similar[0]); + } + + { + String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1", + SuggestMode.SUGGEST_MORE_POPULAR); + assertEquals(0, similar.length); + } + + { + String[] similar = spellChecker.suggestSimilar("eighty", 5, r, "field1", + SuggestMode.SUGGEST_ALWAYS); + assertEquals(5, similar.length); + assertEquals("eight", similar[0]); + } + + { + String[] similar = spellChecker.suggestSimilar("eight", 5, r, "field1", + SuggestMode.SUGGEST_ALWAYS); + assertEquals(5, similar.length); + assertEquals("eighty", similar[0]); + } + r.close(); + } private void checkCommonSuggestions(IndexReader r) throws IOException { String[] similar = spellChecker.suggestSimilar("fvie", 2); assertTrue(similar.length > 0); @@ -174,10 +253,12 @@ public class TestSpellChecker extends LuceneTestCase { assertEquals(similar[0], "five"); // test restraint to a field - similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false); + similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(0, similar.length); // there isn't the term thousand in the field field1 - similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false); + similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.length); // there is the term thousand in the field field2 } @@ -214,10 +295,12 @@ public class TestSpellChecker extends LuceneTestCase { assertEquals(similar[0], "five"); // test restraint to a field - similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false); + similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(0, similar.length); // there isn't the term thousand in the field field1 - similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false); + similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.length); // there is the term thousand in the field field2 similar = spellChecker.suggestSimilar("onety", 2); @@ -225,7 +308,8 @@ public class TestSpellChecker extends LuceneTestCase { assertEquals(similar[0], "ninety"); assertEquals(similar[1], "one"); try { - similar = spellChecker.suggestSimilar("tousand", 10, r, null, false); + similar = spellChecker.suggestSimilar("tousand", 10, r, null, + SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); } catch (NullPointerException e) { assertTrue("threw an NPE, and it shouldn't have", false); } diff --git a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java index 60ff32a5600..f4bd5c5babb 100644 --- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.StringDistance; +import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordQueue; @@ -195,11 +196,11 @@ public class DirectSolrSpellChecker extends SolrSpellChecker { SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; - + SuggestMode mode = options.onlyMorePopular ? SuggestMode.SUGGEST_MORE_POPULAR : SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; for (Token token : options.tokens) { Term term = new Term(field, token.toString()); SuggestWord[] suggestions = checker.suggestSimilar(term, - options.count, options.reader, options.onlyMorePopular, accuracy); + options.count, options.reader, mode, accuracy); result.addFrequency(token, options.reader.docFreq(term)); for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq);