diff --git a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java index 1eefbb158a6..ffb7e9d607f 100644 --- a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java @@ -391,7 +391,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { .queryName(queryName); } - private static List parseValues(XContentParser parser) throws IOException { + static List parseValues(XContentParser parser) throws IOException { List values = new ArrayList<>(); while (parser.nextToken() != XContentParser.Token.END_ARRAY) { Object value = parser.objectBytes(); diff --git a/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java new file mode 100644 index 00000000000..0947a67212d --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java @@ -0,0 +1,369 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.query; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CoveringQuery; +import org.apache.lucene.search.DoubleValues; +import org.apache.lucene.search.LongValues; +import org.apache.lucene.search.LongValuesSource; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.SearchScript; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public final class TermsSetQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "terms_set"; + + static final ParseField TERMS_FIELD = new ParseField("terms"); + static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match_field"); + static final ParseField MINIMUM_SHOULD_MATCH_SCRIPT = new ParseField("minimum_should_match_script"); + + private final String fieldName; + private final List values; + + private String minimumShouldMatchField; + private Script minimumShouldMatchScript; + + public TermsSetQueryBuilder(String fieldName, List values) { + this.fieldName = Objects.requireNonNull(fieldName); + this.values = TermsQueryBuilder.convert(Objects.requireNonNull(values)); + } + + public TermsSetQueryBuilder(StreamInput in) throws IOException { + super(in); + this.fieldName = in.readString(); + this.values = (List) in.readGenericValue(); + this.minimumShouldMatchField = in.readOptionalString(); + this.minimumShouldMatchScript = in.readOptionalWriteable(Script::new); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(fieldName); + out.writeGenericValue(values); + out.writeOptionalString(minimumShouldMatchField); + out.writeOptionalWriteable(minimumShouldMatchScript); + } + + public List getValues() { + return values; + } + + public String getMinimumShouldMatchField() { + return minimumShouldMatchField; + } + + public TermsSetQueryBuilder setMinimumShouldMatchField(String minimumShouldMatchField) { + if (minimumShouldMatchScript != null) { + throw new IllegalArgumentException("A script has already been specified. Cannot specify both a field and script"); + } + this.minimumShouldMatchField = minimumShouldMatchField; + return this; + } + + public Script getMinimumShouldMatchScript() { + return minimumShouldMatchScript; + } + + public TermsSetQueryBuilder setMinimumShouldMatchScript(Script minimumShouldMatchScript) { + if (minimumShouldMatchField != null) { + throw new IllegalArgumentException("A field has already been specified. Cannot specify both a field and script"); + } + this.minimumShouldMatchScript = minimumShouldMatchScript; + return this; + } + + @Override + protected boolean doEquals(TermsSetQueryBuilder other) { + return Objects.equals(fieldName, this.fieldName) && Objects.equals(values, this.values) && + Objects.equals(minimumShouldMatchField, this.minimumShouldMatchField) && + Objects.equals(minimumShouldMatchScript, this.minimumShouldMatchScript); + } + + @Override + protected int doHashCode() { + return Objects.hash(fieldName, values, minimumShouldMatchField, minimumShouldMatchScript); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.startObject(fieldName); + builder.field(TERMS_FIELD.getPreferredName(), TermsQueryBuilder.convertBack(values)); + if (minimumShouldMatchField != null) { + builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatchField); + } + if (minimumShouldMatchScript != null) { + builder.field(MINIMUM_SHOULD_MATCH_SCRIPT.getPreferredName(), minimumShouldMatchScript); + } + printBoostAndQueryName(builder); + builder.endObject(); + builder.endObject(); + } + + public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token = parser.nextToken(); + if (token != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + String currentFieldName = parser.currentName(); + String fieldName = currentFieldName; + + token = parser.nextToken(); + if (token != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + + List values = new ArrayList<>(); + String minimumShouldMatchField = null; + Script minimumShouldMatchScript = null; + String queryName = null; + float boost = AbstractQueryBuilder.DEFAULT_BOOST; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + if (TERMS_FIELD.match(currentFieldName)) { + values = TermsQueryBuilder.parseValues(parser); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (MINIMUM_SHOULD_MATCH_SCRIPT.match(currentFieldName)) { + minimumShouldMatchScript = Script.parse(parser); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else if (token.isValue()) { + if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName)) { + minimumShouldMatchField = parser.text(); + } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName)) { + boost = parser.floatValue(); + } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) { + queryName = parser.text(); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + + "] after [" + currentFieldName + "]"); + } + } + + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values) + .queryName(queryName).boost(boost); + if (minimumShouldMatchField != null) { + queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField); + } + if (minimumShouldMatchScript != null) { + queryBuilder.setMinimumShouldMatchScript(minimumShouldMatchScript); + } + return queryBuilder; + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + if (values.isEmpty()) { + return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query."); + } + // Fail before we attempt to create the term queries: + if (values.size() > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + + final MappedFieldType fieldType = context.fieldMapper(fieldName); + final List queries = new ArrayList<>(values.size()); + for (Object value : values) { + if (fieldType != null) { + queries.add(fieldType.termQuery(value, context)); + } else { + queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value)))); + } + } + final LongValuesSource longValuesSource; + if (minimumShouldMatchField != null) { + MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField); + if (msmFieldType == null) { + throw new QueryShardException(context, "failed to find minimum_should_match field [" + minimumShouldMatchField + "]"); + } + + IndexNumericFieldData fieldData = context.getForField(msmFieldType); + longValuesSource = new FieldValuesSource(fieldData); + } else if (minimumShouldMatchScript != null) { + SearchScript.Factory factory = context.getScriptService().compile(minimumShouldMatchScript, SearchScript.CONTEXT); + Map params = new HashMap<>(); + params.putAll(minimumShouldMatchScript.getParams()); + params.put("num_terms", queries.size()); + SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup()); + longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory); + } else { + throw new IllegalStateException("No minimum should match has been specified"); + } + return new CoveringQuery(queries, longValuesSource); + } + + static final class ScriptLongValueSource extends LongValuesSource { + + private final Script script; + private final SearchScript.LeafFactory leafFactory; + + ScriptLongValueSource(Script script, SearchScript.LeafFactory leafFactory) { + this.script = script; + this.leafFactory = leafFactory; + } + + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + SearchScript searchScript = leafFactory.newInstance(ctx); + return new LongValues() { + @Override + public long longValue() throws IOException { + return searchScript.runAsLong(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + searchScript.setDocument(doc); + return searchScript.run() != null; + } + }; + } + + @Override + public boolean needsScores() { + return false; + } + + @Override + public int hashCode() { + // CoveringQuery with this field value source cannot be cachable + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return this == obj; + } + + @Override + public String toString() { + return "script(" + script.toString() + ")"; + } + + } + + // Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric + // doc values, because that is what is being used in NumberFieldMapper. + static class FieldValuesSource extends LongValuesSource { + + private final IndexNumericFieldData field; + + FieldValuesSource(IndexNumericFieldData field) { + this.field = field; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldValuesSource that = (FieldValuesSource) o; + return Objects.equals(field, that.field); + } + + @Override + public String toString() { + return "long(" + field + ")"; + } + + @Override + public int hashCode() { + return Objects.hash(field); + } + + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + SortedNumericDocValues values = field.load(ctx).getLongValues(); + return new LongValues() { + + long current = -1; + + @Override + public long longValue() throws IOException { + return current; + } + + @Override + public boolean advanceExact(int doc) throws IOException { + boolean hasValue = values.advanceExact(doc); + if (hasValue) { + assert values.docValueCount() == 1; + current = values.nextValue(); + return true; + } else { + return false; + } + } + }; + } + + @Override + public boolean needsScores() { + return false; + } + } + +} diff --git a/core/src/main/java/org/elasticsearch/search/SearchModule.java b/core/src/main/java/org/elasticsearch/search/SearchModule.java index 7f47d11403a..53f8840f8bf 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/core/src/main/java/org/elasticsearch/search/SearchModule.java @@ -69,6 +69,7 @@ import org.elasticsearch.index.query.SpanTermQueryBuilder; import org.elasticsearch.index.query.SpanWithinQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.index.query.TermsSetQueryBuilder; import org.elasticsearch.index.query.TypeQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; @@ -748,6 +749,7 @@ public class SearchModule { registerQuery(new QuerySpec<>(GeoPolygonQueryBuilder.NAME, GeoPolygonQueryBuilder::new, GeoPolygonQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); diff --git a/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java new file mode 100644 index 00000000000..f3226acc2ea --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.CoveringQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.script.MockScriptEngine; +import org.elasticsearch.script.MockScriptPlugin; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptType; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; +import org.elasticsearch.test.rest.yaml.ObjectPath; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class TermsSetQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected Collection> getPlugins() { + return Collections.singleton(CustomScriptPlugin.class); + } + + @Override + protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { + String docType = "doc"; + mapperService.merge(docType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(docType, + "m_s_m", "type=long" + ).string()), MapperService.MergeReason.MAPPING_UPDATE, false); + } + + @Override + protected TermsSetQueryBuilder doCreateTestQueryBuilder() { + String fieldName; + do { + fieldName = randomFrom(MAPPED_FIELD_NAMES); + } while (fieldName.equals(GEO_POINT_FIELD_NAME) || fieldName.equals(GEO_SHAPE_FIELD_NAME)); + int numValues = randomIntBetween(0, 10); + List randomTerms = new ArrayList<>(numValues); + for (int i = 0; i < numValues; i++) { + randomTerms.add(getRandomValueForFieldName(fieldName)); + } + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(STRING_FIELD_NAME, randomTerms); + if (randomBoolean()) { + queryBuilder.setMinimumShouldMatchField("m_s_m"); + } else { + queryBuilder.setMinimumShouldMatchScript( + new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap())); + } + return queryBuilder; + } + + @Override + protected void doAssertLuceneQuery(TermsSetQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + if (queryBuilder.getValues().isEmpty()) { + assertThat(query, instanceOf(MatchNoDocsQuery.class)); + MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query; + assertThat(matchNoDocsQuery.toString(), containsString("No terms supplied for \"terms_set\" query.")); + } else { + assertThat(query, instanceOf(CoveringQuery.class)); + } + } + + @Override + protected boolean isCachable(TermsSetQueryBuilder queryBuilder) { + return queryBuilder.getMinimumShouldMatchField() != null || + (queryBuilder.getMinimumShouldMatchScript() != null && queryBuilder.getValues().isEmpty()); + } + + @Override + protected boolean builderGeneratesCacheableQueries() { + return false; + } + + public void testBothFieldAndScriptSpecified() { + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder("_field", Collections.emptyList()); + queryBuilder.setMinimumShouldMatchScript(new Script("")); + expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchField("_field")); + + queryBuilder.setMinimumShouldMatchScript(null); + queryBuilder.setMinimumShouldMatchField("_field"); + expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchScript(new Script(""))); + } + + public void testDoToQuery() throws Exception { + try (Directory directory = newDirectory()) { + IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); + config.setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter iw = new IndexWriter(directory, config)) { + Document document = new Document(); + document.add(new TextField("message", "a b", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 2)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 2)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 3)); + iw.addDocument(document); + } + + try (IndexReader ir = DirectoryReader.open(directory)) { + QueryShardContext context = createShardContext(); + Query query = new TermsSetQueryBuilder("message", Arrays.asList("c", "d")) + .setMinimumShouldMatchField("m_s_m").doToQuery(context); + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC)); + assertThat(topDocs.totalHits, equalTo(3L)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(1)); + assertThat(topDocs.scoreDocs[1].doc, equalTo(3)); + assertThat(topDocs.scoreDocs[2].doc, equalTo(4)); + } + } + } + + public void testDoToQuery_msmScriptField() throws Exception { + try (Directory directory = newDirectory()) { + IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); + config.setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter iw = new IndexWriter(directory, config)) { + Document document = new Document(); + document.add(new TextField("message", "a b x y", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 50)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b x y", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 75)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c x", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 75)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c x", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 100)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 100)); + iw.addDocument(document); + } + + try (IndexReader ir = DirectoryReader.open(directory)) { + QueryShardContext context = createShardContext(); + Script script = new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap()); + Query query = new TermsSetQueryBuilder("message", Arrays.asList("a", "b", "c", "d")) + .setMinimumShouldMatchScript(script).doToQuery(context); + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC)); + assertThat(topDocs.totalHits, equalTo(3L)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); + assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); + assertThat(topDocs.scoreDocs[2].doc, equalTo(4)); + } + } + } + + public static class CustomScriptPlugin extends MockScriptPlugin { + + @Override + protected Map, Object>> pluginScripts() { + return Collections.singletonMap("_script", args -> { + try { + int clauseCount = ObjectPath.evaluate(args, "params.num_terms"); + long msm = ((ScriptDocValues.Longs) ObjectPath.evaluate(args, "doc.m_s_m")).getValue(); + return clauseCount * (msm / 100d); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + } + +} + diff --git a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java index 4988d75ca41..fccec4ed468 100644 --- a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -323,6 +323,7 @@ public class SearchModuleTests extends ModuleTestCase { "span_within", "term", "terms", + "terms_set", "type", "wildcard", "wrapper" diff --git a/docs/reference/query-dsl/term-level-queries.asciidoc b/docs/reference/query-dsl/term-level-queries.asciidoc index a6aae489668..883fd4c36b5 100644 --- a/docs/reference/query-dsl/term-level-queries.asciidoc +++ b/docs/reference/query-dsl/term-level-queries.asciidoc @@ -21,6 +21,12 @@ The queries in this group are: Find documents which contain any of the exact terms specified in the field specified. +<>:: + + Find documents which match with one or more of the specified terms. The + number of terms that must match depend on the specified minimum should + match field or script. + <>:: Find documents where the field specified contains values (dates, numbers, @@ -66,6 +72,8 @@ include::term-query.asciidoc[] include::terms-query.asciidoc[] +include::terms-set-query.asciidoc[] + include::range-query.asciidoc[] include::exists-query.asciidoc[] diff --git a/docs/reference/query-dsl/terms-set-query.asciidoc b/docs/reference/query-dsl/terms-set-query.asciidoc new file mode 100644 index 00000000000..659f840cccb --- /dev/null +++ b/docs/reference/query-dsl/terms-set-query.asciidoc @@ -0,0 +1,122 @@ +[[query-dsl-terms-set-query]] +=== Terms Set Query + +experimental[The terms_set query is a new query and its syntax may change in the future] + +Returns any documents that match with at least one or more of the +provided terms. The terms are not analyzed and thus must match exactly. +The number of terms that must match varies per document and is either +controlled by a minimum should match field or computed per document in +a minimum should match script. + +The field that controls the number of required terms that must match must +be a number field: + +[source,js] +-------------------------------------------------- +PUT /my-index +{ + "mappings": { + "doc": { + "properties": { + "required_matches": { + "type": "long" + } + } + } + } +} + +PUT /my-index/doc/1?refresh +{ + "codes": ["ghi", "jkl"], + "required_matches": 2 +} + +PUT /my-index/doc/2?refresh +{ + "codes": ["def", "ghi"], + "required_matches": 2 +} +-------------------------------------------------- +// CONSOLE +// TESTSETUP + +An example that uses the minimum should match field: + +[source,js] +-------------------------------------------------- +GET /my-index/_search +{ + "query": { + "terms_set": { + "codes" : { + "terms" : ["abc", "def", "ghi"], + "minimum_should_match_field": "required_matches" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +Response: + +[source,js] +-------------------------------------------------- +{ + "took": 13, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped" : 0, + "failed": 0 + }, + "hits": { + "total": 1, + "max_score": 0.5753642, + "hits": [ + { + "_index": "my-index", + "_type": "doc", + "_id": "2", + "_score": 0.5753642, + "_source": { + "codes": ["def", "ghi"], + "required_matches": 2 + } + } + ] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took": 13,/"took": "$body.took",/] + +Scripts can also be used to control how many terms are required to match +in a more dynamic way. For example a create date or a popularity field +can be used as basis for the number of required terms to match. + +Also the `params.num_terms` parameter is available in the script to indicate the +number of terms that have been specified. + +An example that always limits the number of required terms to match to never +become larger than the number of terms specified: + +[source,js] +-------------------------------------------------- +GET /my-index/_search +{ + "query": { + "terms_set": { + "codes" : { + "terms" : ["abc", "def", "ghi"], + "minimum_should_match_script": { + "source": "Math.min(params.num_terms, doc['required_matches'].value)" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE