Added new terms_set query

This query returns documents that match with at least one ore more
of the provided terms. The number of terms that must match varies
per document and is either controlled by a minimum should match
field or computed per document in a minimum should match script.

Closes #26915
This commit is contained in:
Martijn van Groningen 2017-10-27 13:14:44 +02:00
parent 354862c26e
commit d805c41b28
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
7 changed files with 751 additions and 1 deletions

View File

@ -391,7 +391,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
.queryName(queryName);
}
private static List<Object> parseValues(XContentParser parser) throws IOException {
static List<Object> parseValues(XContentParser parser) throws IOException {
List<Object> values = new ArrayList<>();
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
Object value = parser.objectBytes();

View File

@ -0,0 +1,369 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.SearchScript;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQueryBuilder> {
public static final String NAME = "terms_set";
static final ParseField TERMS_FIELD = new ParseField("terms");
static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match_field");
static final ParseField MINIMUM_SHOULD_MATCH_SCRIPT = new ParseField("minimum_should_match_script");
private final String fieldName;
private final List<?> values;
private String minimumShouldMatchField;
private Script minimumShouldMatchScript;
public TermsSetQueryBuilder(String fieldName, List<?> values) {
this.fieldName = Objects.requireNonNull(fieldName);
this.values = TermsQueryBuilder.convert(Objects.requireNonNull(values));
}
public TermsSetQueryBuilder(StreamInput in) throws IOException {
super(in);
this.fieldName = in.readString();
this.values = (List<?>) in.readGenericValue();
this.minimumShouldMatchField = in.readOptionalString();
this.minimumShouldMatchScript = in.readOptionalWriteable(Script::new);
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeString(fieldName);
out.writeGenericValue(values);
out.writeOptionalString(minimumShouldMatchField);
out.writeOptionalWriteable(minimumShouldMatchScript);
}
public List<?> getValues() {
return values;
}
public String getMinimumShouldMatchField() {
return minimumShouldMatchField;
}
public TermsSetQueryBuilder setMinimumShouldMatchField(String minimumShouldMatchField) {
if (minimumShouldMatchScript != null) {
throw new IllegalArgumentException("A script has already been specified. Cannot specify both a field and script");
}
this.minimumShouldMatchField = minimumShouldMatchField;
return this;
}
public Script getMinimumShouldMatchScript() {
return minimumShouldMatchScript;
}
public TermsSetQueryBuilder setMinimumShouldMatchScript(Script minimumShouldMatchScript) {
if (minimumShouldMatchField != null) {
throw new IllegalArgumentException("A field has already been specified. Cannot specify both a field and script");
}
this.minimumShouldMatchScript = minimumShouldMatchScript;
return this;
}
@Override
protected boolean doEquals(TermsSetQueryBuilder other) {
return Objects.equals(fieldName, this.fieldName) && Objects.equals(values, this.values) &&
Objects.equals(minimumShouldMatchField, this.minimumShouldMatchField) &&
Objects.equals(minimumShouldMatchScript, this.minimumShouldMatchScript);
}
@Override
protected int doHashCode() {
return Objects.hash(fieldName, values, minimumShouldMatchField, minimumShouldMatchScript);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.startObject(fieldName);
builder.field(TERMS_FIELD.getPreferredName(), TermsQueryBuilder.convertBack(values));
if (minimumShouldMatchField != null) {
builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatchField);
}
if (minimumShouldMatchScript != null) {
builder.field(MINIMUM_SHOULD_MATCH_SCRIPT.getPreferredName(), minimumShouldMatchScript);
}
printBoostAndQueryName(builder);
builder.endObject();
builder.endObject();
}
public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IOException {
XContentParser.Token token = parser.nextToken();
if (token != XContentParser.Token.FIELD_NAME) {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
}
String currentFieldName = parser.currentName();
String fieldName = currentFieldName;
token = parser.nextToken();
if (token != XContentParser.Token.START_OBJECT) {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
}
List<Object> values = new ArrayList<>();
String minimumShouldMatchField = null;
Script minimumShouldMatchScript = null;
String queryName = null;
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if (TERMS_FIELD.match(currentFieldName)) {
values = TermsQueryBuilder.parseValues(parser);
} else {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+ currentFieldName + "]");
}
} else if (token == XContentParser.Token.START_OBJECT) {
if (MINIMUM_SHOULD_MATCH_SCRIPT.match(currentFieldName)) {
minimumShouldMatchScript = Script.parse(parser);
} else {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+ currentFieldName + "]");
}
} else if (token.isValue()) {
if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName)) {
minimumShouldMatchField = parser.text();
} else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName)) {
boost = parser.floatValue();
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
queryName = parser.text();
} else {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+ currentFieldName + "]");
}
} else {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token +
"] after [" + currentFieldName + "]");
}
}
token = parser.nextToken();
if (token != XContentParser.Token.END_OBJECT) {
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
}
TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values)
.queryName(queryName).boost(boost);
if (minimumShouldMatchField != null) {
queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField);
}
if (minimumShouldMatchScript != null) {
queryBuilder.setMinimumShouldMatchScript(minimumShouldMatchScript);
}
return queryBuilder;
}
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
if (values.isEmpty()) {
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
}
// Fail before we attempt to create the term queries:
if (values.size() > BooleanQuery.getMaxClauseCount()) {
throw new BooleanQuery.TooManyClauses();
}
final MappedFieldType fieldType = context.fieldMapper(fieldName);
final List<Query> queries = new ArrayList<>(values.size());
for (Object value : values) {
if (fieldType != null) {
queries.add(fieldType.termQuery(value, context));
} else {
queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value))));
}
}
final LongValuesSource longValuesSource;
if (minimumShouldMatchField != null) {
MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField);
if (msmFieldType == null) {
throw new QueryShardException(context, "failed to find minimum_should_match field [" + minimumShouldMatchField + "]");
}
IndexNumericFieldData fieldData = context.getForField(msmFieldType);
longValuesSource = new FieldValuesSource(fieldData);
} else if (minimumShouldMatchScript != null) {
SearchScript.Factory factory = context.getScriptService().compile(minimumShouldMatchScript, SearchScript.CONTEXT);
Map<String, Object> params = new HashMap<>();
params.putAll(minimumShouldMatchScript.getParams());
params.put("num_terms", queries.size());
SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup());
longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory);
} else {
throw new IllegalStateException("No minimum should match has been specified");
}
return new CoveringQuery(queries, longValuesSource);
}
static final class ScriptLongValueSource extends LongValuesSource {
private final Script script;
private final SearchScript.LeafFactory leafFactory;
ScriptLongValueSource(Script script, SearchScript.LeafFactory leafFactory) {
this.script = script;
this.leafFactory = leafFactory;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
SearchScript searchScript = leafFactory.newInstance(ctx);
return new LongValues() {
@Override
public long longValue() throws IOException {
return searchScript.runAsLong();
}
@Override
public boolean advanceExact(int doc) throws IOException {
searchScript.setDocument(doc);
return searchScript.run() != null;
}
};
}
@Override
public boolean needsScores() {
return false;
}
@Override
public int hashCode() {
// CoveringQuery with this field value source cannot be cachable
return System.identityHashCode(this);
}
@Override
public boolean equals(Object obj) {
return this == obj;
}
@Override
public String toString() {
return "script(" + script.toString() + ")";
}
}
// Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric
// doc values, because that is what is being used in NumberFieldMapper.
static class FieldValuesSource extends LongValuesSource {
private final IndexNumericFieldData field;
FieldValuesSource(IndexNumericFieldData field) {
this.field = field;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FieldValuesSource that = (FieldValuesSource) o;
return Objects.equals(field, that.field);
}
@Override
public String toString() {
return "long(" + field + ")";
}
@Override
public int hashCode() {
return Objects.hash(field);
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
SortedNumericDocValues values = field.load(ctx).getLongValues();
return new LongValues() {
long current = -1;
@Override
public long longValue() throws IOException {
return current;
}
@Override
public boolean advanceExact(int doc) throws IOException {
boolean hasValue = values.advanceExact(doc);
if (hasValue) {
assert values.docValueCount() == 1;
current = values.nextValue();
return true;
} else {
return false;
}
}
};
}
@Override
public boolean needsScores() {
return false;
}
}
}

View File

@ -69,6 +69,7 @@ import org.elasticsearch.index.query.SpanTermQueryBuilder;
import org.elasticsearch.index.query.SpanWithinQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.TermsQueryBuilder;
import org.elasticsearch.index.query.TermsSetQueryBuilder;
import org.elasticsearch.index.query.TypeQueryBuilder;
import org.elasticsearch.index.query.WildcardQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
@ -748,6 +749,7 @@ public class SearchModule {
registerQuery(new QuerySpec<>(GeoPolygonQueryBuilder.NAME, GeoPolygonQueryBuilder::new, GeoPolygonQueryBuilder::fromXContent));
registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent));
registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent));
registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent));
if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) {
registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent));

View File

@ -0,0 +1,248 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.MockScriptPlugin;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.AbstractQueryTestCase;
import org.elasticsearch.test.rest.yaml.ObjectPath;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
public class TermsSetQueryBuilderTests extends AbstractQueryTestCase<TermsSetQueryBuilder> {
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Collections.singleton(CustomScriptPlugin.class);
}
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
String docType = "doc";
mapperService.merge(docType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(docType,
"m_s_m", "type=long"
).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
}
@Override
protected TermsSetQueryBuilder doCreateTestQueryBuilder() {
String fieldName;
do {
fieldName = randomFrom(MAPPED_FIELD_NAMES);
} while (fieldName.equals(GEO_POINT_FIELD_NAME) || fieldName.equals(GEO_SHAPE_FIELD_NAME));
int numValues = randomIntBetween(0, 10);
List<Object> randomTerms = new ArrayList<>(numValues);
for (int i = 0; i < numValues; i++) {
randomTerms.add(getRandomValueForFieldName(fieldName));
}
TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(STRING_FIELD_NAME, randomTerms);
if (randomBoolean()) {
queryBuilder.setMinimumShouldMatchField("m_s_m");
} else {
queryBuilder.setMinimumShouldMatchScript(
new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap()));
}
return queryBuilder;
}
@Override
protected void doAssertLuceneQuery(TermsSetQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException {
if (queryBuilder.getValues().isEmpty()) {
assertThat(query, instanceOf(MatchNoDocsQuery.class));
MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query;
assertThat(matchNoDocsQuery.toString(), containsString("No terms supplied for \"terms_set\" query."));
} else {
assertThat(query, instanceOf(CoveringQuery.class));
}
}
@Override
protected boolean isCachable(TermsSetQueryBuilder queryBuilder) {
return queryBuilder.getMinimumShouldMatchField() != null ||
(queryBuilder.getMinimumShouldMatchScript() != null && queryBuilder.getValues().isEmpty());
}
@Override
protected boolean builderGeneratesCacheableQueries() {
return false;
}
public void testBothFieldAndScriptSpecified() {
TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder("_field", Collections.emptyList());
queryBuilder.setMinimumShouldMatchScript(new Script(""));
expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchField("_field"));
queryBuilder.setMinimumShouldMatchScript(null);
queryBuilder.setMinimumShouldMatchField("_field");
expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchScript(new Script("")));
}
public void testDoToQuery() throws Exception {
try (Directory directory = newDirectory()) {
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setMergePolicy(NoMergePolicy.INSTANCE);
try (IndexWriter iw = new IndexWriter(directory, config)) {
Document document = new Document();
document.add(new TextField("message", "a b", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 1));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 1));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 2));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c d", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 1));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c d", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 2));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c d", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 3));
iw.addDocument(document);
}
try (IndexReader ir = DirectoryReader.open(directory)) {
QueryShardContext context = createShardContext();
Query query = new TermsSetQueryBuilder("message", Arrays.asList("c", "d"))
.setMinimumShouldMatchField("m_s_m").doToQuery(context);
IndexSearcher searcher = new IndexSearcher(ir);
TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC));
assertThat(topDocs.totalHits, equalTo(3L));
assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
assertThat(topDocs.scoreDocs[1].doc, equalTo(3));
assertThat(topDocs.scoreDocs[2].doc, equalTo(4));
}
}
}
public void testDoToQuery_msmScriptField() throws Exception {
try (Directory directory = newDirectory()) {
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setMergePolicy(NoMergePolicy.INSTANCE);
try (IndexWriter iw = new IndexWriter(directory, config)) {
Document document = new Document();
document.add(new TextField("message", "a b x y", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 50));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b x y", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 75));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c x", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 75));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c x", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 100));
iw.addDocument(document);
document = new Document();
document.add(new TextField("message", "a b c d", Field.Store.NO));
document.add(new SortedNumericDocValuesField("m_s_m", 100));
iw.addDocument(document);
}
try (IndexReader ir = DirectoryReader.open(directory)) {
QueryShardContext context = createShardContext();
Script script = new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap());
Query query = new TermsSetQueryBuilder("message", Arrays.asList("a", "b", "c", "d"))
.setMinimumShouldMatchScript(script).doToQuery(context);
IndexSearcher searcher = new IndexSearcher(ir);
TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC));
assertThat(topDocs.totalHits, equalTo(3L));
assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
assertThat(topDocs.scoreDocs[2].doc, equalTo(4));
}
}
}
public static class CustomScriptPlugin extends MockScriptPlugin {
@Override
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
return Collections.singletonMap("_script", args -> {
try {
int clauseCount = ObjectPath.evaluate(args, "params.num_terms");
long msm = ((ScriptDocValues.Longs) ObjectPath.evaluate(args, "doc.m_s_m")).getValue();
return clauseCount * (msm / 100d);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
}
}

View File

@ -323,6 +323,7 @@ public class SearchModuleTests extends ModuleTestCase {
"span_within",
"term",
"terms",
"terms_set",
"type",
"wildcard",
"wrapper"

View File

@ -21,6 +21,12 @@ The queries in this group are:
Find documents which contain any of the exact terms specified in the field
specified.
<<query-dsl-terms-set-query,`terms_set` query>>::
Find documents which match with one or more of the specified terms. The
number of terms that must match depend on the specified minimum should
match field or script.
<<query-dsl-range-query,`range` query>>::
Find documents where the field specified contains values (dates, numbers,
@ -66,6 +72,8 @@ include::term-query.asciidoc[]
include::terms-query.asciidoc[]
include::terms-set-query.asciidoc[]
include::range-query.asciidoc[]
include::exists-query.asciidoc[]

View File

@ -0,0 +1,122 @@
[[query-dsl-terms-set-query]]
=== Terms Set Query
experimental[The terms_set query is a new query and its syntax may change in the future]
Returns any documents that match with at least one or more of the
provided terms. The terms are not analyzed and thus must match exactly.
The number of terms that must match varies per document and is either
controlled by a minimum should match field or computed per document in
a minimum should match script.
The field that controls the number of required terms that must match must
be a number field:
[source,js]
--------------------------------------------------
PUT /my-index
{
"mappings": {
"doc": {
"properties": {
"required_matches": {
"type": "long"
}
}
}
}
}
PUT /my-index/doc/1?refresh
{
"codes": ["ghi", "jkl"],
"required_matches": 2
}
PUT /my-index/doc/2?refresh
{
"codes": ["def", "ghi"],
"required_matches": 2
}
--------------------------------------------------
// CONSOLE
// TESTSETUP
An example that uses the minimum should match field:
[source,js]
--------------------------------------------------
GET /my-index/_search
{
"query": {
"terms_set": {
"codes" : {
"terms" : ["abc", "def", "ghi"],
"minimum_should_match_field": "required_matches"
}
}
}
}
--------------------------------------------------
// CONSOLE
Response:
[source,js]
--------------------------------------------------
{
"took": 13,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped" : 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.5753642,
"hits": [
{
"_index": "my-index",
"_type": "doc",
"_id": "2",
"_score": 0.5753642,
"_source": {
"codes": ["def", "ghi"],
"required_matches": 2
}
}
]
}
}
--------------------------------------------------
// TESTRESPONSE[s/"took": 13,/"took": "$body.took",/]
Scripts can also be used to control how many terms are required to match
in a more dynamic way. For example a create date or a popularity field
can be used as basis for the number of required terms to match.
Also the `params.num_terms` parameter is available in the script to indicate the
number of terms that have been specified.
An example that always limits the number of required terms to match to never
become larger than the number of terms specified:
[source,js]
--------------------------------------------------
GET /my-index/_search
{
"query": {
"terms_set": {
"codes" : {
"terms" : ["abc", "def", "ghi"],
"minimum_should_match_script": {
"source": "Math.min(params.num_terms, doc['required_matches'].value)"
}
}
}
}
}
--------------------------------------------------
// CONSOLE