WordBreakSpellChecker now correctly respects maxEvaluations (#12077)

This commit is contained in:
Chris Hostetter 2023-01-22 15:44:29 -07:00
parent 519adcc954
commit 9007f746a3
3 changed files with 44 additions and 12 deletions

View File

@ -252,6 +252,8 @@ Bug Fixes
* GITHUB#12084: Same bound with fallbackQuery. (Lu Xugang)
* GITHUB#12077: WordBreakSpellChecker now correctly respects maxEvaluations (hossman)
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all

View File

@ -252,17 +252,21 @@ public class WordBreakSpellChecker {
if (useMinBreakWordLength < 1) {
useMinBreakWordLength = 1;
}
if (termLength < (useMinBreakWordLength * 2)) {
return 0;
return totalEvaluations;
}
int thisTimeEvaluations = 0;
for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++) {
if (totalEvaluations >= maxEvaluations) {
break;
}
totalEvaluations++;
int end = termText.offsetByCodePoints(0, i);
String leftText = termText.substring(0, end);
String rightText = termText.substring(end);
SuggestWord leftWord = generateSuggestWord(ir, term.field(), leftText);
if (leftWord.freq >= useMinSuggestionFrequency) {
SuggestWord rightWord = generateSuggestWord(ir, term.field(), rightText);
if (rightWord.freq >= useMinSuggestionFrequency) {
@ -275,7 +279,7 @@ public class WordBreakSpellChecker {
}
int newNumberBreaks = numberBreaks + 1;
if (newNumberBreaks <= maxChanges) {
int evaluations =
totalEvaluations =
generateBreakUpSuggestions(
new Term(term.field(), rightWord.string),
ir,
@ -286,17 +290,11 @@ public class WordBreakSpellChecker {
suggestions,
totalEvaluations,
sortMethod);
totalEvaluations += evaluations;
}
}
thisTimeEvaluations++;
totalEvaluations++;
if (totalEvaluations >= maxEvaluations) {
break;
}
}
return thisTimeEvaluations;
return totalEvaluations;
}
private SuggestWord[] newPrefix(SuggestWord[] oldPrefix, SuggestWord append) {

View File

@ -54,6 +54,11 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
writer.addDocument(doc);
}
{
Document doc = new Document();
doc.add(newTextField("abba", "A B AB ABA BAB", Field.Store.NO));
writer.addDocument(doc);
}
{
Document doc = new Document();
doc.add(newTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
@ -80,6 +85,33 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
super.tearDown();
}
public void testMaxEvaluations() throws Exception {
final int maxEvals = 100;
try (IndexReader ir = DirectoryReader.open(dir)) {
WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
wbsp.setMaxChanges(10);
wbsp.setMinBreakWordLength(1);
wbsp.setMinSuggestionFrequency(1);
wbsp.setMaxEvaluations(100);
Term term = new Term("abba", "ab".repeat(5));
SuggestWord[][] sw =
wbsp.suggestWordBreaks(
term,
500,
ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
// sanity check that our suggester isn't completely broken
assertThat(sw.length, org.hamcrest.Matchers.greaterThan(0));
// if maxEvaluations is respected, we can't possibly have more suggestions then that.
assertThat(sw.length, org.hamcrest.Matchers.lessThan(maxEvals));
}
}
public void testCombiningWords() throws Exception {
IndexReader ir = DirectoryReader.open(dir);
WordBreakSpellChecker wbsp = new WordBreakSpellChecker();