Handle min_doc_freq in phrase suggester (#40840)
The phrase suggesters have an option to remove terms that have a frequency lower than a provided min_doc_freq. However this value is overwritten by the frequency of the original term in the popular mode. This change ensures that we keep the maximum value between the provided min_doc_value and the original term frequency as a threshold to select candidates. Fixes #16764
This commit is contained in:
parent
fb5d7cf237
commit
bc0fe7d64d
|
@ -132,31 +132,41 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
|
||||
Candidate original = set.originalTerm;
|
||||
BytesRef term = preFilter(original.term, spare, byteSpare);
|
||||
if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
|
||||
/**
|
||||
* We use the {@link TermStats#docFreq} to compute the frequency threshold
|
||||
* because that's what {@link DirectSpellChecker#suggestSimilar} expects
|
||||
* when filtering terms.
|
||||
*/
|
||||
int threshold = thresholdTermFrequency(original.termStats.docFreq);
|
||||
if (threshold == Integer.MAX_VALUE) {
|
||||
// the threshold is the max possible frequency so we can skip the search
|
||||
return set;
|
||||
float origThreshold = spellchecker.getThresholdFrequency();
|
||||
try {
|
||||
if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
|
||||
/**
|
||||
* We use the {@link TermStats#docFreq} to compute the frequency threshold
|
||||
* because that's what {@link DirectSpellChecker#suggestSimilar} expects
|
||||
* when filtering terms.
|
||||
*/
|
||||
int threshold = thresholdTermFrequency(original.termStats.docFreq);
|
||||
if (threshold == Integer.MAX_VALUE) {
|
||||
// the threshold is the max possible frequency so we can skip the search
|
||||
return set;
|
||||
}
|
||||
// don't override the threshold if the provided min_doc_freq is greater
|
||||
// than the original term frequency.
|
||||
if (spellchecker.getThresholdFrequency() < threshold) {
|
||||
spellchecker.setThresholdFrequency(threshold);
|
||||
}
|
||||
}
|
||||
spellchecker.setThresholdFrequency(threshold);
|
||||
}
|
||||
|
||||
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
|
||||
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
|
||||
for (int i = 0; i < suggestSimilar.length; i++) {
|
||||
SuggestWord suggestWord = suggestSimilar[i];
|
||||
BytesRef candidate = new BytesRef(suggestWord.string);
|
||||
TermStats termStats = internalTermStats(candidate);
|
||||
postFilter(new Candidate(candidate, termStats,
|
||||
suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
|
||||
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
|
||||
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
|
||||
for (int i = 0; i < suggestSimilar.length; i++) {
|
||||
SuggestWord suggestWord = suggestSimilar[i];
|
||||
BytesRef candidate = new BytesRef(suggestWord.string);
|
||||
TermStats termStats = internalTermStats(candidate);
|
||||
postFilter(new Candidate(candidate, termStats,
|
||||
suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
|
||||
}
|
||||
set.addCandidates(candidates);
|
||||
return set;
|
||||
} finally {
|
||||
// restore the original value back
|
||||
spellchecker.setThresholdFrequency(origThreshold);
|
||||
}
|
||||
set.addCandidates(candidates);
|
||||
return set;
|
||||
}
|
||||
|
||||
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
|
||||
|
|
|
@ -1005,6 +1005,52 @@ public class SuggestSearchIT extends ESIntegTestCase {
|
|||
assertSuggestion(searchSuggest, 0, "suggestion", "apple");
|
||||
}
|
||||
|
||||
public void testPhraseSuggestMinDocFreq() throws Exception {
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("text")
|
||||
.field("type", "keyword")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(Settings.builder().put("index.number_of_shards", 1).build())
|
||||
.addMapping("type", mapping));
|
||||
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||
builders.add(client().prepareIndex("test", "type").setSource("text", "appfle"));
|
||||
indexRandom(true, false, builders);
|
||||
|
||||
PhraseSuggestionBuilder phraseSuggest = phraseSuggestion("text").text("appple")
|
||||
.size(2)
|
||||
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||
.suggestMode("popular"));
|
||||
|
||||
Suggest searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||
assertSuggestion(searchSuggest, 0, "suggestion", 2, "apple", "appfle");
|
||||
|
||||
phraseSuggest = phraseSuggestion("text").text("appple")
|
||||
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||
.suggestMode("popular")
|
||||
.minDocFreq(2));
|
||||
|
||||
searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||
assertSuggestion(searchSuggest, 0, "suggestion", 1,"apple");
|
||||
|
||||
phraseSuggest = phraseSuggestion("text").text("appple")
|
||||
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||
.suggestMode("popular")
|
||||
.minDocFreq(2));
|
||||
searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||
assertSuggestion(searchSuggest, 0, "suggestion", 1,"apple");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
return Collections.singleton(DummyTemplatePlugin.class);
|
||||
|
|
Loading…
Reference in New Issue