more_like_this query to throw an error if the like fields is not provided (#40632)

With the removal of the `_all` field the `mlt` query cannot infer a field name
to use to analyze the provided (un)like text if the `fields` parameter is not
explicitly set in the query and the `index.query.default_field` is not changed
in the index settings (by default it is set to `*`). For this reason the like text
is ignored and queries are only built from the provided document ids.
This change fixes this bug by throwing an error if the fields option is not set
and the `index.query.default_field` is equals to `*`. The error is thrown only
if like or unlike texts are provided in the query.
This commit is contained in:
Jim Ferenczi 2019-04-18 22:29:36 +02:00 committed by jimczi
parent 1f5cd410b8
commit 068f8ba223
3 changed files with 56 additions and 7 deletions

View File

@ -1050,6 +1050,13 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
List<String> moreLikeFields = new ArrayList<>();
if (useDefaultField) {
moreLikeFields = context.defaultFields();
if (moreLikeFields.size() == 1
&& moreLikeFields.get(0).equals("*")
&& (likeTexts.length > 0 || unlikeTexts.length > 0)) {
throw new IllegalArgumentException("[more_like_this] query cannot infer the field to analyze the free text, " +
"you should update the [index.query.default_field] index setting to a field that exists in the mapping or " +
"set the [fields] option in the query.");
}
} else {
for (String field : fields) {
MappedFieldType fieldType = context.fieldMapper(field);

View File

@ -31,10 +31,12 @@ import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
import org.elasticsearch.action.termvectors.TermVectorsRequest;
import org.elasticsearch.action.termvectors.TermVectorsResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -160,13 +162,13 @@ public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLik
} else {
likeItems = randomLikeItems;
}
if (randomBoolean()) { // for the default field
queryBuilder = new MoreLikeThisQueryBuilder(likeTexts, likeItems);
if (randomBoolean() && likeItems != null && likeItems.length > 0) { // for the default field
queryBuilder = new MoreLikeThisQueryBuilder(null, likeItems);
} else {
queryBuilder = new MoreLikeThisQueryBuilder(randomFields, likeTexts, likeItems);
}
if (randomBoolean()) {
if (randomBoolean() && queryBuilder.fields() != null) {
queryBuilder.unlike(generateRandomStringArray(5, 5, false, false));
}
if (randomBoolean()) {
@ -305,6 +307,39 @@ public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLik
assertThat(e.getMessage(), containsString("more_like_this only supports text/keyword fields"));
}
public void testDefaultField() throws IOException {
QueryShardContext context = createShardContext();
{
MoreLikeThisQueryBuilder builder =
new MoreLikeThisQueryBuilder(new String[]{"hello world"}, null);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> builder.toQuery(context));
assertThat(e.getMessage(), containsString("[more_like_this] query cannot infer"));
}
{
context.getIndexSettings().updateIndexMetaData(
newIndexMeta("index",
context.getIndexSettings().getSettings(),
Settings.builder().putList("index.query.default_field", STRING_FIELD_NAME).build()
)
);
try {
MoreLikeThisQueryBuilder builder = new MoreLikeThisQueryBuilder(new String[]{"hello world"}, null);
builder.toQuery(context);
} finally {
// Reset the default value
context.getIndexSettings().updateIndexMetaData(
newIndexMeta("index",
context.getIndexSettings().getSettings(),
Settings.builder().putList("index.query.default_field", "*").build()
)
);
}
}
}
public void testMoreLikeThisBuilder() throws Exception {
Query parsedQuery =
parseQuery(moreLikeThisQuery(new String[]{"name.first", "name.last"}, new String[]{"something"}, null)
@ -390,4 +425,11 @@ public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLik
}
return query;
}
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
.build();
return IndexMetaData.builder(name).settings(build).build();
}
}

View File

@ -343,10 +343,10 @@ public class MoreLikeThisIT extends ESIntegTestCase {
new MoreLikeThisQueryBuilder(new String[] {"string_value", "int_value"}, null,
new Item[] {new Item("test", "1")}).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
// mlt query with no field -> No results (because _all is not enabled)
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"index"}).minTermFreq(1).minDocFreq(1))
.get();
assertHitCount(searchResponse, 0L);
// mlt query with no field -> exception because _all is not enabled)
assertThrows(client().prepareSearch()
.setQuery(moreLikeThisQuery(new String[] {"index"}).minTermFreq(1).minDocFreq(1)),
SearchPhaseExecutionException.class);
// mlt query with string fields
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[]{"string_value"}, new String[] {"index"}, null)