Don't apply min frequency smoothing if suggest type is 'always'

Using an automatically detected 'min_doc_freq' if suggest type is set to
'always' is counter intuitive. If we suggest always ignore the frequency and
set threshold frequency to 0 to allow all possible candidates to be drawn if
they are within the given bounds.

Closes #3037
This commit is contained in:
Simon Willnauer 2013-05-15 15:13:59 +02:00
parent 48cb06c9cf
commit 8235b89e9c
3 changed files with 77 additions and 7 deletions

View File

@ -120,7 +120,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency;
spellchecker.setThresholdFrequency(thresholdFrequency(frequency, dictSize));
spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List<Candidate> candidates = new ArrayList<Candidate>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
@ -175,7 +175,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
protected long thresholdFrequency(long termFrequency, long dictionarySize) {
if (termFrequency > 0) {
return (long) Math.round(termFrequency * (Math.log10(termFrequency - frequencyPlateau) * (1.0 / Math.log10(logBase))) + 1);
return (long) Math.max(0, Math.round(termFrequency * (Math.log10(termFrequency - frequencyPlateau) * (1.0 / Math.log10(logBase))) + 1));
}
return 0;

View File

@ -25,6 +25,7 @@ import java.util.Arrays;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.suggest.Suggest;
import org.hamcrest.Matcher;
/**
@ -73,6 +74,24 @@ public class ElasticsearchAssertions {
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field).fragments().length, greaterThan(fragment));
assertThat(resp.getHits().hits()[hit].highlightFields().get(field).fragments()[fragment].string(), matcher);
}
public static void assertSuggestionSize(Suggest searchSuggest, int entry, int size, String key) {
assertThat(searchSuggest, notNullValue());
assertThat(searchSuggest.size(),greaterThanOrEqualTo(1));
assertThat(searchSuggest.getSuggestion(key).getName(), equalTo(key));
assertThat(searchSuggest.getSuggestion(key).getEntries().size(), greaterThanOrEqualTo(entry));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().size(), equalTo(size));
}
public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
assertThat(searchSuggest, notNullValue());
assertThat(searchSuggest.size(), greaterThanOrEqualTo(1));
assertThat(searchSuggest.getSuggestion(key).getName(), equalTo(key));
assertThat(searchSuggest.getSuggestion(key).getEntries().size(), greaterThanOrEqualTo(entry));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().size(), greaterThan(ord));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().get(ord).getText().string(), equalTo(text));
}
/*
* matchers

View File

@ -29,11 +29,11 @@ import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@ -42,18 +42,15 @@ import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.action.suggest.SuggestRequestBuilder;
import org.elasticsearch.action.suggest.SuggestResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
@ -82,6 +79,60 @@ public class SuggestSearchTests extends AbstractNodesTests {
return client("server1");
}
@Test // see #3037
public void testSuggestModes() throws IOException {
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", 1).put("index.number_of_replicas", 0);
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.endObject()
.startObject("name_shingled")
.field("type", "string")
.field("index_analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client.admin().indices().prepareDelete().execute().actionGet();
client.admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()).execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()).execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()).execute().actionGet();
client.admin().indices().prepareRefresh().execute().actionGet();
Suggest searchSuggest = searchSuggest(
client,
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
searchSuggest = searchSuggest(
client,
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).maxEdits(2))
.gramSize(3));
assertSuggestionSize(searchSuggest, 0, 0, "did_you_mean");
}
@Test // see #2729
public void testSizeOneShard() throws Exception {
client.admin().indices().prepareDelete().execute().actionGet();