LUCENE-9102: Add maxQueryLength option to DirectSpellchecker.

Closes #1103
This commit is contained in:
Andy Webb 2019-12-23 11:41:56 +01:00 committed by Bruno Roustant
parent db2b21a169
commit 45dce34316
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
2 changed files with 31 additions and 1 deletions

View File

@ -75,6 +75,8 @@ public class DirectSpellChecker {
private float thresholdFrequency = 0f;
/** minimum length of a query word to return suggestions */
private int minQueryLength = 4;
/** maximum length of a query word to return suggestions */
private int maxQueryLength = Integer.MAX_VALUE;
/** value in [0..1] (or absolute number >= 1) representing the maximum
* number of documents (of the total) a query term can appear in to
* be corrected. */
@ -195,9 +197,27 @@ public class DirectSpellChecker {
* metric.
*/
public void setMinQueryLength(int minQueryLength) {
if (minQueryLength > this.maxQueryLength)
throw new IllegalArgumentException("minQueryLength must not be greater than maxQueryLength");
this.minQueryLength = minQueryLength;
}
/** Get the maximum length of a query term to return suggestions */
public int getMaxQueryLength() {
return maxQueryLength;
}
/**
* Set the maximum length of a query term to return suggestions.
* <p>
* Long queries can be expensive to process and/or trigger exceptions.
*/
public void setMaxQueryLength(int maxQueryLength) {
if (maxQueryLength < this.minQueryLength)
throw new IllegalArgumentException("maxQueryLength must not be smaller than minQueryLength");
this.maxQueryLength = maxQueryLength;
}
/**
* Get the maximum threshold of documents a query term can appear in order
* to provide suggestions.
@ -317,7 +337,9 @@ public class DirectSpellChecker {
SuggestMode suggestMode, float accuracy) throws IOException {
final CharsRefBuilder spare = new CharsRefBuilder();
String text = term.text();
if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength)
int textLength = text.codePointCount(0, text.length());
if (textLength < minQueryLength || textLength > maxQueryLength)
return new SuggestWord[0];
if (lowerCaseTerms) {

View File

@ -147,11 +147,19 @@ public class TestDirectSpellChecker extends LuceneTestCase {
"fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length);
// confirm that a term shorter than minQueryLength is not spellchecked
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMinQueryLength(5);
similar = spellChecker.suggestSimilar(new Term("text", "foba"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length);
// confirm that a term longer than maxQueryLength is not spellchecked
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMaxQueryLength(5);
similar = spellChecker.suggestSimilar(new Term("text", "foobrr"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.setMaxEdits(1);