mirror of https://github.com/apache/lucene.git
WordBreakSpellChecker now correctly respects maxEvaluations (#12077)
This commit is contained in:
parent
519adcc954
commit
9007f746a3
|
@ -252,6 +252,8 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12084: Same bound with fallbackQuery. (Lu Xugang)
|
||||
|
||||
* GITHUB#12077: WordBreakSpellChecker now correctly respects maxEvaluations (hossman)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
|
||||
|
|
|
@ -252,17 +252,21 @@ public class WordBreakSpellChecker {
|
|||
if (useMinBreakWordLength < 1) {
|
||||
useMinBreakWordLength = 1;
|
||||
}
|
||||
|
||||
if (termLength < (useMinBreakWordLength * 2)) {
|
||||
return 0;
|
||||
return totalEvaluations;
|
||||
}
|
||||
|
||||
int thisTimeEvaluations = 0;
|
||||
for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++) {
|
||||
if (totalEvaluations >= maxEvaluations) {
|
||||
break;
|
||||
}
|
||||
totalEvaluations++;
|
||||
|
||||
int end = termText.offsetByCodePoints(0, i);
|
||||
String leftText = termText.substring(0, end);
|
||||
String rightText = termText.substring(end);
|
||||
SuggestWord leftWord = generateSuggestWord(ir, term.field(), leftText);
|
||||
|
||||
if (leftWord.freq >= useMinSuggestionFrequency) {
|
||||
SuggestWord rightWord = generateSuggestWord(ir, term.field(), rightText);
|
||||
if (rightWord.freq >= useMinSuggestionFrequency) {
|
||||
|
@ -275,7 +279,7 @@ public class WordBreakSpellChecker {
|
|||
}
|
||||
int newNumberBreaks = numberBreaks + 1;
|
||||
if (newNumberBreaks <= maxChanges) {
|
||||
int evaluations =
|
||||
totalEvaluations =
|
||||
generateBreakUpSuggestions(
|
||||
new Term(term.field(), rightWord.string),
|
||||
ir,
|
||||
|
@ -286,17 +290,11 @@ public class WordBreakSpellChecker {
|
|||
suggestions,
|
||||
totalEvaluations,
|
||||
sortMethod);
|
||||
totalEvaluations += evaluations;
|
||||
}
|
||||
}
|
||||
|
||||
thisTimeEvaluations++;
|
||||
totalEvaluations++;
|
||||
if (totalEvaluations >= maxEvaluations) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return thisTimeEvaluations;
|
||||
|
||||
return totalEvaluations;
|
||||
}
|
||||
|
||||
private SuggestWord[] newPrefix(SuggestWord[] oldPrefix, SuggestWord append) {
|
||||
|
|
|
@ -54,6 +54,11 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
{
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("abba", "A B AB ABA BAB", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
{
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
|
||||
|
@ -80,6 +85,33 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testMaxEvaluations() throws Exception {
|
||||
final int maxEvals = 100;
|
||||
|
||||
try (IndexReader ir = DirectoryReader.open(dir)) {
|
||||
WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
|
||||
wbsp.setMaxChanges(10);
|
||||
wbsp.setMinBreakWordLength(1);
|
||||
wbsp.setMinSuggestionFrequency(1);
|
||||
wbsp.setMaxEvaluations(100);
|
||||
|
||||
Term term = new Term("abba", "ab".repeat(5));
|
||||
SuggestWord[][] sw =
|
||||
wbsp.suggestWordBreaks(
|
||||
term,
|
||||
500,
|
||||
ir,
|
||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||
|
||||
// sanity check that our suggester isn't completely broken
|
||||
assertThat(sw.length, org.hamcrest.Matchers.greaterThan(0));
|
||||
|
||||
// if maxEvaluations is respected, we can't possibly have more suggestions then that.
|
||||
assertThat(sw.length, org.hamcrest.Matchers.lessThan(maxEvals));
|
||||
}
|
||||
}
|
||||
|
||||
public void testCombiningWords() throws Exception {
|
||||
IndexReader ir = DirectoryReader.open(dir);
|
||||
WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
|
||||
|
|
Loading…
Reference in New Issue