Throw IAE if suggest results return differently sized results.

If the term suggester is used the results are merged depending on
the number of terms produced by the tokenizer / tokenfilter. If a
term suggester is executed across multiple indices that share the
same field but with different analysis chains we can't merge the
result anymore sicne tokens are our of order or have a different size.

This commit throws ESIllegalArgumentException if the number of entries
are not the same across all results.

Closes #3196
This commit is contained in:
Simon Willnauer 2013-12-15 20:46:02 +01:00
parent 2f2b95a6b8
commit 3e321972cc
2 changed files with 112 additions and 9 deletions

View File

@ -20,6 +20,7 @@ package org.elasticsearch.search.suggest;
import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
@ -250,7 +251,11 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
List<T> currentEntries = new ArrayList<T>(); List<T> currentEntries = new ArrayList<T>();
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
for (Suggestion<T> suggestion : toReduce) { for (Suggestion<T> suggestion : toReduce) {
assert suggestion.entries.size() == size; if(suggestion.entries.size() != size) {
throw new ElasticSearchIllegalStateException("Can't merge suggest result, this might be caused by suggest calls " +
"across multiple indices with different analysis chains. Suggest entries have different sizes actual [" +
suggestion.entries.size() + "] expected [" + size +"]");
}
assert suggestion.name.equals(leader.name); assert suggestion.name.equals(leader.name);
currentEntries.add(suggestion.entries.get(i)); currentEntries.add(suggestion.entries.get(i));
} }
@ -361,14 +366,18 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
CollectionUtil.timSort(options, comparator); CollectionUtil.timSort(options, comparator);
} }
protected Entry<O> reduce(List<? extends Entry<O>> toReduce) { protected <T extends Entry<O>> Entry<O> reduce(List<T> toReduce) {
if (toReduce.size() == 1) { if (toReduce.size() == 1) {
return toReduce.get(0); return toReduce.get(0);
} }
final Map<O, O> entries = new HashMap<O, O>(); final Map<O, O> entries = new HashMap<O, O>();
Entry<O> leader = toReduce.get(0); Entry<O> leader = toReduce.get(0);
for (Entry<O> entry : toReduce) { for (Entry<O> entry : toReduce) {
assert leader.text.equals(entry.text); if (!leader.text.equals(entry.text)) {
throw new ElasticSearchIllegalStateException("Can't merge suggest entries, this might be caused by suggest calls " +
"across multiple indices with different analysis chains. Suggest entries have different text actual [" +
entry.text + "] expected [" + leader.text +"]");
}
assert leader.offset == entry.offset; assert leader.offset == entry.offset;
assert leader.length == entry.length; assert leader.length == entry.length;
leader.merge(entry); leader.merge(entry);

View File

@ -23,11 +23,9 @@ import com.google.common.base.Charsets;
import com.google.common.io.Resources; import com.google.common.io.Resources;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.*;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -51,8 +49,7 @@ import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion; import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
import static org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.candidateGenerator; import static org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.candidateGenerator;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.*;
import static org.hamcrest.Matchers.nullValue;
/** /**
* Integration tests for term and phrase suggestions. Many of these tests many requests that vary only slightly from one another. Where * Integration tests for term and phrase suggestions. Many of these tests many requests that vary only slightly from one another. Where
@ -60,6 +57,103 @@ import static org.hamcrest.Matchers.nullValue;
* request, modify again, request again, etc. This makes it very obvious what changes between requests. * request, modify again, request again, etc. This makes it very obvious what changes between requests.
*/ */
public class SuggestSearchTests extends ElasticsearchIntegrationTest { public class SuggestSearchTests extends ElasticsearchIntegrationTest {
@Test // see #3196
public void testSuggestAcrossMultipleIndices() throws IOException {
prepareCreate("test").setSettings(
SETTING_NUMBER_OF_SHARDS, between(1, 5),
SETTING_NUMBER_OF_REPLICAS, between(0, 1)).get();
ensureGreen();
index("test", "type1", "1", "text", "abcd");
index("test", "type1", "2", "text", "aacd");
index("test", "type1", "3", "text", "abbd");
index("test", "type1", "4", "text", "abcc");
refresh();
TermSuggestionBuilder termSuggest = termSuggestion("test")
.suggestMode("always") // Always, otherwise the results can vary between requests.
.text("abcd")
.field("text");
logger.info("--> run suggestions with one index");
searchSuggest(client(), termSuggest);
prepareCreate("test_1").setSettings(
SETTING_NUMBER_OF_SHARDS, between(1, 5),
SETTING_NUMBER_OF_REPLICAS, between(0, 1)).get();
ensureGreen();
index("test_1", "type1", "1", "text", "ab cd");
index("test_1", "type1", "2", "text", "aa cd");
index("test_1", "type1", "3", "text", "ab bd");
index("test_1", "type1", "4", "text", "ab cc");
refresh();
termSuggest = termSuggestion("test")
.suggestMode("always") // Always, otherwise the results can vary between requests.
.text("ab cd")
.minWordLength(1)
.field("text");
logger.info("--> run suggestions with two indices");
searchSuggest(client(), termSuggest);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("text").field("type", "string").field("analyzer", "keyword").endObject()
.endObject()
.endObject().endObject();
assertAcked(prepareCreate("test_2").setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, between(1, 5))
.put(SETTING_NUMBER_OF_REPLICAS, between(0, 1))
).addMapping("type1", mapping));
ensureGreen();
index("test_2", "type1", "1", "text", "ab cd");
index("test_2", "type1", "2", "text", "aa cd");
index("test_2", "type1", "3", "text", "ab bd");
index("test_2", "type1", "4", "text", "ab cc");
index("test_2", "type1", "1", "text", "abcd");
index("test_2", "type1", "2", "text", "aacd");
index("test_2", "type1", "3", "text", "abbd");
index("test_2", "type1", "4", "text", "abcc");
refresh();
termSuggest = termSuggestion("test")
.suggestMode("always") // Always, otherwise the results can vary between requests.
.text("ab cd")
.minWordLength(1)
.field("text");
logger.info("--> run suggestions with three indices");
try {
searchSuggest(client(), termSuggest);
fail(" can not suggest across multiple indices with different analysis chains");
} catch (ReduceSearchPhaseException ex) {
assertThat(ex.getCause(), instanceOf(ElasticSearchIllegalStateException.class));
assertThat(ex.getCause().getMessage(), endsWith("Suggest entries have different sizes actual [1] expected [2]"));
} catch (ElasticSearchIllegalStateException ex) {
assertThat(ex.getMessage(), endsWith("Suggest entries have different sizes actual [1] expected [2]"));
}
termSuggest = termSuggestion("test")
.suggestMode("always") // Always, otherwise the results can vary between requests.
.text("ABCD")
.minWordLength(1)
.field("text");
logger.info("--> run suggestions with four indices");
try {
searchSuggest(client(), termSuggest);
fail(" can not suggest across multiple indices with different analysis chains");
} catch (ReduceSearchPhaseException ex) {
assertThat(ex.getCause(), instanceOf(ElasticSearchIllegalStateException.class));
assertThat(ex.getCause().getMessage(), endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"));
} catch (ElasticSearchIllegalStateException ex) {
assertThat(ex.getMessage(), endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"));
}
}
@Test // see #3037 @Test // see #3037
public void testSuggestModes() throws IOException { public void testSuggestModes() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()