mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-31 04:18:39 +00:00
Handle min_doc_freq in phrase suggester (#40840)
The phrase suggesters have an option to remove terms that have a frequency lower than a provided min_doc_freq. However this value is overwritten by the frequency of the original term in the popular mode. This change ensures that we keep the maximum value between the provided min_doc_value and the original term frequency as a threshold to select candidates. Fixes #16764
This commit is contained in:
parent
fb5d7cf237
commit
bc0fe7d64d
server/src
main/java/org/elasticsearch/search/suggest/phrase
test/java/org/elasticsearch/search/suggest
@ -132,31 +132,41 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||||||
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
|
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
|
||||||
Candidate original = set.originalTerm;
|
Candidate original = set.originalTerm;
|
||||||
BytesRef term = preFilter(original.term, spare, byteSpare);
|
BytesRef term = preFilter(original.term, spare, byteSpare);
|
||||||
if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
|
float origThreshold = spellchecker.getThresholdFrequency();
|
||||||
/**
|
try {
|
||||||
* We use the {@link TermStats#docFreq} to compute the frequency threshold
|
if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
|
||||||
* because that's what {@link DirectSpellChecker#suggestSimilar} expects
|
/**
|
||||||
* when filtering terms.
|
* We use the {@link TermStats#docFreq} to compute the frequency threshold
|
||||||
*/
|
* because that's what {@link DirectSpellChecker#suggestSimilar} expects
|
||||||
int threshold = thresholdTermFrequency(original.termStats.docFreq);
|
* when filtering terms.
|
||||||
if (threshold == Integer.MAX_VALUE) {
|
*/
|
||||||
// the threshold is the max possible frequency so we can skip the search
|
int threshold = thresholdTermFrequency(original.termStats.docFreq);
|
||||||
return set;
|
if (threshold == Integer.MAX_VALUE) {
|
||||||
|
// the threshold is the max possible frequency so we can skip the search
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
// don't override the threshold if the provided min_doc_freq is greater
|
||||||
|
// than the original term frequency.
|
||||||
|
if (spellchecker.getThresholdFrequency() < threshold) {
|
||||||
|
spellchecker.setThresholdFrequency(threshold);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
spellchecker.setThresholdFrequency(threshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
|
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
|
||||||
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
|
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
|
||||||
for (int i = 0; i < suggestSimilar.length; i++) {
|
for (int i = 0; i < suggestSimilar.length; i++) {
|
||||||
SuggestWord suggestWord = suggestSimilar[i];
|
SuggestWord suggestWord = suggestSimilar[i];
|
||||||
BytesRef candidate = new BytesRef(suggestWord.string);
|
BytesRef candidate = new BytesRef(suggestWord.string);
|
||||||
TermStats termStats = internalTermStats(candidate);
|
TermStats termStats = internalTermStats(candidate);
|
||||||
postFilter(new Candidate(candidate, termStats,
|
postFilter(new Candidate(candidate, termStats,
|
||||||
suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
|
suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
|
||||||
|
}
|
||||||
|
set.addCandidates(candidates);
|
||||||
|
return set;
|
||||||
|
} finally {
|
||||||
|
// restore the original value back
|
||||||
|
spellchecker.setThresholdFrequency(origThreshold);
|
||||||
}
|
}
|
||||||
set.addCandidates(candidates);
|
|
||||||
return set;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
|
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
|
||||||
|
@ -1005,6 +1005,52 @@ public class SuggestSearchIT extends ESIntegTestCase {
|
|||||||
assertSuggestion(searchSuggest, 0, "suggestion", "apple");
|
assertSuggestion(searchSuggest, 0, "suggestion", "apple");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPhraseSuggestMinDocFreq() throws Exception {
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("text")
|
||||||
|
.field("type", "keyword")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject();
|
||||||
|
assertAcked(prepareCreate("test")
|
||||||
|
.setSettings(Settings.builder().put("index.number_of_shards", 1).build())
|
||||||
|
.addMapping("type", mapping));
|
||||||
|
|
||||||
|
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||||
|
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||||
|
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||||
|
builders.add(client().prepareIndex("test", "type").setSource("text", "apple"));
|
||||||
|
builders.add(client().prepareIndex("test", "type").setSource("text", "appfle"));
|
||||||
|
indexRandom(true, false, builders);
|
||||||
|
|
||||||
|
PhraseSuggestionBuilder phraseSuggest = phraseSuggestion("text").text("appple")
|
||||||
|
.size(2)
|
||||||
|
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||||
|
.suggestMode("popular"));
|
||||||
|
|
||||||
|
Suggest searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||||
|
assertSuggestion(searchSuggest, 0, "suggestion", 2, "apple", "appfle");
|
||||||
|
|
||||||
|
phraseSuggest = phraseSuggestion("text").text("appple")
|
||||||
|
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||||
|
.suggestMode("popular")
|
||||||
|
.minDocFreq(2));
|
||||||
|
|
||||||
|
searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||||
|
assertSuggestion(searchSuggest, 0, "suggestion", 1,"apple");
|
||||||
|
|
||||||
|
phraseSuggest = phraseSuggestion("text").text("appple")
|
||||||
|
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text")
|
||||||
|
.suggestMode("popular")
|
||||||
|
.minDocFreq(2));
|
||||||
|
searchSuggest = searchSuggest("suggestion", phraseSuggest);
|
||||||
|
assertSuggestion(searchSuggest, 0, "suggestion", 1,"apple");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
return Collections.singleton(DummyTemplatePlugin.class);
|
return Collections.singleton(DummyTemplatePlugin.class);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user