Add a `feature_vector` field. (#31102)

This field is similar to the `feature` field but is better suited to index
sparse feature vectors. A use-case for this field could be to record topics
associated with every documents alongside a metric that quantifies how well
the topic is connected to this document, and then boost queries based on the
topics that the logged user is interested in.

Relates #27552
This commit is contained in:
Adrien Grand 2018-06-07 10:05:37 +02:00 committed by GitHub
parent 75a676c70b
commit 458bca11bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 635 additions and 48 deletions

View File

@ -42,6 +42,8 @@ string:: <<text,`text`>> and <<keyword,`keyword`>>
<<feature>>:: Record numeric features to boost hits at query time.
<<feature-vector>>:: Record numeric feature vectors to boost hits at query time.
[float]
=== Multi-fields
@ -90,4 +92,4 @@ include::types/parent-join.asciidoc[]
include::types/feature.asciidoc[]
include::types/feature-vector.asciidoc[]

View File

@ -0,0 +1,64 @@
[[feature-vector]]
=== Feature vector datatype
A `feature_vector` field can index numeric feature vectors, so that they can
later be used to boost documents in queries with a
<<query-dsl-feature-query,`feature`>> query.
It is analogous to the <<feature,`feature`>> datatype but is better suited
when the list of features is sparse so that it wouldn't be reasonable to add
one field to the mappings for each of them.
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"_doc": {
"properties": {
"topics": {
"type": "feature_vector" <1>
}
}
}
}
}
PUT my_index/_doc/1
{
"topics": { <2>
"politics": 20,
"economics": 50.8
}
}
PUT my_index/_doc/2
{
"topics": {
"politics": 5.2,
"sports": 80.1
}
}
GET my_index/_search
{
"query": {
"feature": {
"field": "topics.politics"
}
}
}
--------------------------------------------------
// CONSOLE
<1> Feature vector fields must use the `feature_vector` field type
<2> Feature vector fields must be a hash with string keys and strictly positive numeric values
NOTE: `feature_vector` fields only support single-valued features and strictly
positive values. Multi-valued fields and zero or negative values will be rejected.
NOTE: `feature_vector` fields do not support sorting or aggregating and may
only be queried using <<query-dsl-feature-query,`feature`>> queries.
NOTE: `feature_vector` fields only preserve 9 significant bits for the
precision, which translates to a relative error of about 0.4%.

View File

@ -2,9 +2,10 @@
=== Feature Query
The `feature` query is a specialized query that only works on
<<feature,`feature`>> fields. Its goal is to boost the score of documents based
on the values of numeric features. It is typically put in a `should` clause of
a <<query-dsl-bool-query,`bool`>> query so that its score is added to the score
<<feature,`feature`>> fields and <<feature-vector,`feature_vector`>> fields.
Its goal is to boost the score of documents based on the values of numeric
features. It is typically put in a `should` clause of a
<<query-dsl-bool-query,`bool`>> query so that its score is added to the score
of the query.
Compared to using <<query-dsl-function-score-query,`function_score`>> or other
@ -13,7 +14,16 @@ efficiently skip non-competitive hits when
<<search-uri-request,`track_total_hits`>> is set to `false`. Speedups may be
spectacular.
Here is an example:
Here is an example that indexes various features:
- https://en.wikipedia.org/wiki/PageRank[`pagerank`], a measure of the
importance of a website,
- `url_length`, the length of the url, which typically correlates negatively
with relevance,
- `topics`, which associates a list of topics with every document alongside a
measure of how well the document is connected to this topic.
Then the example includes an example query that searches for `"2016"` and boosts
based or `pagerank`, `url_length` and the `sports` topic.
[source,js]
--------------------------------------------------
@ -28,6 +38,9 @@ PUT test
"url_length": {
"type": "feature",
"positive_score_impact": false
},
"topics": {
"type": "feature_vector"
}
}
}
@ -36,32 +49,73 @@ PUT test
PUT test/_doc/1
{
"pagerank": 10,
"url_length": 50
"url": "http://en.wikipedia.org/wiki/2016_Summer_Olympics",
"content": "Rio 2016",
"pagerank": 50.3,
"url_length": 42,
"topics": {
"sports": 50,
"brazil": 30
}
}
PUT test/_doc/2
{
"pagerank": 100,
"url_length": 20
"url": "http://en.wikipedia.org/wiki/2016_Brazilian_Grand_Prix",
"content": "Formula One motor race held on 13 November 2016 at the Autódromo José Carlos Pace in São Paulo, Brazil",
"pagerank": 50.3,
"url_length": 47,
"topics": {
"sports": 35,
"formula one": 65,
"brazil": 20
}
}
PUT test/_doc/3
{
"url": "http://en.wikipedia.org/wiki/Deadpool_(film)",
"content": "Deadpool is a 2016 American superhero film",
"pagerank": 50.3,
"url_length": 37,
"topics": {
"movies": 60,
"super hero": 65
}
}
POST test/_refresh
GET test/_search
GET test/_search
{
"query": {
"feature": {
"field": "pagerank"
}
}
}
GET test/_search
{
"query": {
"feature": {
"field": "url_length"
"bool": {
"must": [
{
"match": {
"content": "2016"
}
}
],
"should": [
{
"feature": {
"field": "pagerank"
}
},
{
"feature": {
"field": "url_length",
"boost": 0.1
}
},
{
"feature": {
"field": "topics.sports",
"boost": 0.4
}
}
]
}
}
}

View File

@ -165,8 +165,7 @@ public class FeatureFieldMapper extends FieldMapper {
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
failIfNoDocValues();
return new DocValuesIndexFieldData.Builder();
throw new UnsupportedOperationException("[feature] fields do not support sorting, scripting or aggregating");
}
@Override
@ -229,10 +228,6 @@ public class FeatureFieldMapper extends FieldMapper {
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
if (includeDefaults || fieldType().nullValue() != null) {
builder.field("null_value", fieldType().nullValue());
}
if (includeDefaults || fieldType().positiveScoreImpact() == false) {
builder.field("positive_score_impact", fieldType().positiveScoreImpact());
}

View File

@ -0,0 +1,182 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.document.FeatureField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
* vector of features.
*/
public class FeatureVectorFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "feature_vector";
public static class Defaults {
public static final MappedFieldType FIELD_TYPE = new FeatureVectorFieldType();
static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setIndexOptions(IndexOptions.NONE);
FIELD_TYPE.setHasDocValues(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
}
public static class Builder extends FieldMapper.Builder<Builder, FeatureVectorFieldMapper> {
public Builder(String name) {
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
builder = this;
}
@Override
public FeatureVectorFieldType fieldType() {
return (FeatureVectorFieldType) super.fieldType();
}
@Override
public FeatureVectorFieldMapper build(BuilderContext context) {
setupFieldType(context);
return new FeatureVectorFieldMapper(
name, fieldType, defaultFieldType,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
FeatureVectorFieldMapper.Builder builder = new FeatureVectorFieldMapper.Builder(name);
return builder;
}
}
public static final class FeatureVectorFieldType extends MappedFieldType {
public FeatureVectorFieldType() {
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
}
protected FeatureVectorFieldType(FeatureVectorFieldType ref) {
super(ref);
}
public FeatureVectorFieldType clone() {
return new FeatureVectorFieldType(this);
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException("[feature_vector] fields do not support [exists] queries");
}
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
throw new UnsupportedOperationException("[feature_vector] fields do not support sorting, scripting or aggregating");
}
@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new UnsupportedOperationException("Queries on [feature_vector] fields are not supported");
}
}
private FeatureVectorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
}
@Override
protected FeatureVectorFieldMapper clone() {
return (FeatureVectorFieldMapper) super.clone();
}
@Override
public FeatureVectorFieldType fieldType() {
return (FeatureVectorFieldType) super.fieldType();
}
@Override
public FieldMapper parse(ParseContext context) throws IOException {
if (context.externalValueSet()) {
throw new IllegalArgumentException("[feature_vector] fields can't be used in multi-fields");
}
if (context.parser().currentToken() != Token.START_OBJECT) {
throw new IllegalArgumentException("[feature_vector] fields must be json objects, expected a START_OBJECT but got: " +
context.parser().currentToken());
}
String feature = null;
for (Token token = context.parser().nextToken(); token != Token.END_OBJECT; token = context.parser().nextToken()) {
if (token == Token.FIELD_NAME) {
feature = context.parser().currentName();
} else if (token == Token.VALUE_NULL) {
// ignore feature, this is consistent with numeric fields
} else if (token == Token.VALUE_NUMBER || token == Token.VALUE_STRING) {
final String key = name() + "." + feature;
float value = context.parser().floatValue(true);
if (context.doc().getByKey(key) != null) {
throw new IllegalArgumentException("[feature_vector] fields do not support indexing multiple values for the same " +
"feature [" + key + "] in the same document");
}
context.doc().addWithKey(key, new FeatureField(name(), feature, value));
} else {
throw new IllegalArgumentException("[feature_vector] fields take hashes that map a feature to a strictly positive " +
"float, but got unexpected token " + token);
}
}
return null; // no mapping update
}
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
throw new AssertionError("parse is implemented directly");
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
}

View File

@ -38,6 +38,7 @@ public class MapperExtrasPlugin extends Plugin implements MapperPlugin, SearchPl
mappers.put(ScaledFloatFieldMapper.CONTENT_TYPE, new ScaledFloatFieldMapper.TypeParser());
mappers.put(TokenCountFieldMapper.CONTENT_TYPE, new TokenCountFieldMapper.TypeParser());
mappers.put(FeatureFieldMapper.CONTENT_TYPE, new FeatureFieldMapper.TypeParser());
mappers.put(FeatureVectorFieldMapper.CONTENT_TYPE, new FeatureVectorFieldMapper.TypeParser());
return Collections.unmodifiableMap(mappers);
}

View File

@ -28,6 +28,8 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.FeatureFieldMapper.FeatureFieldType;
import org.elasticsearch.index.mapper.FeatureMetaFieldMapper;
import org.elasticsearch.index.mapper.FeatureVectorFieldMapper.FeatureVectorFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import java.io.IOException;
@ -48,7 +50,7 @@ public final class FeatureQueryBuilder extends AbstractQueryBuilder<FeatureQuery
abstract void writeTo(StreamOutput out) throws IOException;
abstract Query toQuery(String feature, boolean positiveScoreImpact) throws IOException;
abstract Query toQuery(String field, String feature, boolean positiveScoreImpact) throws IOException;
abstract void doXContent(XContentBuilder builder) throws IOException;
@ -102,12 +104,12 @@ public final class FeatureQueryBuilder extends AbstractQueryBuilder<FeatureQuery
}
@Override
Query toQuery(String feature, boolean positiveScoreImpact) throws IOException {
Query toQuery(String field, String feature, boolean positiveScoreImpact) throws IOException {
if (positiveScoreImpact == false) {
throw new IllegalArgumentException("Cannot use the [log] function with a field that has a negative score impact as " +
"it would trigger negative scores");
}
return FeatureField.newLogQuery("_feature", feature, DEFAULT_BOOST, scalingFactor);
return FeatureField.newLogQuery(field, feature, DEFAULT_BOOST, scalingFactor);
}
}
@ -173,11 +175,11 @@ public final class FeatureQueryBuilder extends AbstractQueryBuilder<FeatureQuery
}
@Override
Query toQuery(String feature, boolean positiveScoreImpact) throws IOException {
Query toQuery(String field, String feature, boolean positiveScoreImpact) throws IOException {
if (pivot == null) {
return FeatureField.newSaturationQuery("_feature", feature);
return FeatureField.newSaturationQuery(field, feature);
} else {
return FeatureField.newSaturationQuery("_feature", feature, DEFAULT_BOOST, pivot);
return FeatureField.newSaturationQuery(field, feature, DEFAULT_BOOST, pivot);
}
}
}
@ -238,8 +240,8 @@ public final class FeatureQueryBuilder extends AbstractQueryBuilder<FeatureQuery
}
@Override
Query toQuery(String feature, boolean positiveScoreImpact) throws IOException {
return FeatureField.newSigmoidQuery("_feature", feature, DEFAULT_BOOST, pivot, exp);
Query toQuery(String field, String feature, boolean positiveScoreImpact) throws IOException {
return FeatureField.newSigmoidQuery(field, feature, DEFAULT_BOOST, pivot, exp);
}
}
}
@ -331,14 +333,24 @@ public final class FeatureQueryBuilder extends AbstractQueryBuilder<FeatureQuery
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
final MappedFieldType ft = context.fieldMapper(field);
if (ft == null) {
return new MatchNoDocsQuery();
if (ft instanceof FeatureFieldType) {
final FeatureFieldType fft = (FeatureFieldType) ft;
return scoreFunction.toQuery(FeatureMetaFieldMapper.NAME, field, fft.positiveScoreImpact());
} else if (ft == null) {
final int lastDotIndex = field.lastIndexOf('.');
if (lastDotIndex != -1) {
final String parentField = field.substring(0, lastDotIndex);
final MappedFieldType parentFt = context.fieldMapper(parentField);
if (parentFt instanceof FeatureVectorFieldType) {
return scoreFunction.toQuery(parentField, field.substring(lastDotIndex + 1), true);
}
}
return new MatchNoDocsQuery(); // unmapped field
} else {
throw new IllegalArgumentException("[feature] query only works on [feature] fields and features of [feature_vector] fields, " +
"not [" + ft.typeName() + "]");
}
if (ft instanceof FeatureFieldType == false) {
throw new IllegalArgumentException("[feature] query only works on [feature] fields, not [" + ft.typeName() + "]");
}
final FeatureFieldType fft = (FeatureFieldType) ft;
return scoreFunction.toQuery(field, fft.positiveScoreImpact());
}
@Override

View File

@ -35,6 +35,7 @@ import org.hamcrest.Matchers;
import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
public class FeatureFieldMapperTests extends ESSingleNodeTestCase {
@ -53,7 +54,7 @@ public class FeatureFieldMapperTests extends ESSingleNodeTestCase {
return pluginList(MapperExtrasPlugin.class);
}
private static int getFrequency(TokenStream tk) throws IOException {
static int getFrequency(TokenStream tk) throws IOException {
TermFrequencyAttribute freqAttribute = tk.addAttribute(TermFrequencyAttribute.class);
tk.reset();
assertTrue(tk.incrementToken());
@ -143,7 +144,7 @@ public class FeatureFieldMapperTests extends ESSingleNodeTestCase {
assertEquals(mapping, mapper.mappingSource().toString());
MapperParsingException e = null;/*expectThrows(MapperParsingException.class,
MapperParsingException e = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
@ -151,7 +152,7 @@ public class FeatureFieldMapperTests extends ESSingleNodeTestCase {
.endObject()),
XContentType.JSON)));
assertEquals("[feature] fields do not support indexing multiple values for the same field [field] in the same document",
e.getCause().getMessage());*/
e.getCause().getMessage());
e = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", BytesReference

View File

@ -0,0 +1,130 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.document.FeatureField;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.hamcrest.Matchers;
import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
public class FeatureVectorFieldMapperTests extends ESSingleNodeTestCase {
IndexService indexService;
DocumentMapperParser parser;
@Before
public void setup() {
indexService = createIndex("test");
parser = indexService.mapperService().documentMapperParser();
}
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return pluginList(MapperExtrasPlugin.class);
}
public void testDefaults() throws Exception {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "feature_vector").endObject().endObject()
.endObject().endObject());
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc1 = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
.startObject("field")
.field("foo", 10)
.field("bar", 20)
.endObject()
.endObject()),
XContentType.JSON));
IndexableField[] fields = doc1.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertThat(fields[0], Matchers.instanceOf(FeatureField.class));
FeatureField featureField1 = (FeatureField) fields[0];
assertThat(featureField1.stringValue(), Matchers.equalTo("foo"));
FeatureField featureField2 = (FeatureField) fields[1];
assertThat(featureField2.stringValue(), Matchers.equalTo("bar"));
int freq1 = FeatureFieldMapperTests.getFrequency(featureField1.tokenStream(null, null));
int freq2 = FeatureFieldMapperTests.getFrequency(featureField2.tokenStream(null, null));
assertTrue(freq1 < freq2);
}
public void testRejectMultiValuedFields() throws MapperParsingException, IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "feature_vector").endObject().startObject("foo")
.startObject("properties").startObject("field").field("type", "feature_vector").endObject().endObject()
.endObject().endObject().endObject().endObject());
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
MapperParsingException e = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
.startObject("field")
.field("foo", Arrays.asList(10, 20))
.endObject()
.endObject()),
XContentType.JSON)));
assertEquals("[feature_vector] fields take hashes that map a feature to a strictly positive float, but got unexpected token " +
"START_ARRAY", e.getCause().getMessage());
e = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
.startArray("foo")
.startObject()
.startObject("field")
.field("bar", 10)
.endObject()
.endObject()
.startObject()
.startObject("field")
.field("bar", 20)
.endObject()
.endObject()
.endArray()
.endObject()),
XContentType.JSON)));
assertEquals("[feature_vector] fields do not support indexing multiple values for the same feature [foo.field.bar] in the same " +
"document", e.getCause().getMessage());
}
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper;
public class FeatureVectorFieldTypeTests extends FieldTypeTestCase {
@Override
protected MappedFieldType createDefaultFieldType() {
return new FeatureVectorFieldMapper.FeatureVectorFieldType();
}
}

View File

@ -33,8 +33,10 @@ import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.AbstractQueryTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.Matchers.either;
@ -46,7 +48,8 @@ public class FeatureQueryBuilderTests extends AbstractQueryTestCase<FeatureQuery
for (String type : getCurrentTypes()) {
mapperService.merge(type, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(type,
"my_feature_field", "type=feature",
"my_negative_feature_field", "type=feature,positive_score_impact=false"))), MapperService.MergeReason.MAPPING_UPDATE);
"my_negative_feature_field", "type=feature,positive_score_impact=false",
"my_feature_vector_field", "type=feature_vector"))), MapperService.MergeReason.MAPPING_UPDATE);
}
}
@ -58,8 +61,10 @@ public class FeatureQueryBuilderTests extends AbstractQueryTestCase<FeatureQuery
@Override
protected FeatureQueryBuilder doCreateTestQueryBuilder() {
ScoreFunction function;
boolean mayUseNegativeField = true;
switch (random().nextInt(3)) {
case 0:
mayUseNegativeField = false;
function = new ScoreFunction.Log(1 + randomFloat());
break;
case 1:
@ -75,7 +80,16 @@ public class FeatureQueryBuilderTests extends AbstractQueryTestCase<FeatureQuery
default:
throw new AssertionError();
}
return new FeatureQueryBuilder("my_feature_field", function);
List<String> fields = new ArrayList<>();
fields.add("my_feature_field");
fields.add("unmapped_field");
fields.add("my_feature_vector_field.feature");
if (mayUseNegativeField) {
fields.add("my_negative_feature_field");
}
final String field = randomFrom(fields);
return new FeatureQueryBuilder(field, function);
}
@Override
@ -103,7 +117,7 @@ public class FeatureQueryBuilderTests extends AbstractQueryTestCase<FeatureQuery
" }\n" +
"}";
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> parseQuery(query).toQuery(createShardContext()));
assertEquals("[feature] query only works on [feature] fields, not [text]", e.getMessage());
assertEquals("[feature] query only works on [feature] fields and features of [feature_vector] fields, not [text]", e.getMessage());
}
public void testIllegalCombination() throws IOException {

View File

@ -0,0 +1,103 @@
setup:
- skip:
version: " - 6.99.99"
reason: "The feature_vector field was introduced in 7.0.0"
- do:
indices.create:
index: test
body:
settings:
number_of_replicas: 0
mappings:
_doc:
properties:
tags:
type: feature_vector
- do:
index:
index: test
type: _doc
id: 1
body:
tags:
foo: 3
bar: 5
- do:
index:
index: test
type: _doc
id: 2
body:
tags:
bar: 6
quux: 10
- do:
indices.refresh: {}
---
"Log":
- do:
search:
body:
query:
feature:
field: tags.bar
log:
scaling_factor: 3
- match:
hits.total: 2
- match:
hits.hits.0._id: "2"
- match:
hits.hits.1._id: "1"
---
"Saturation":
- do:
search:
body:
query:
feature:
field: tags.bar
saturation:
pivot: 20
- match:
hits.total: 2
- match:
hits.hits.0._id: "2"
- match:
hits.hits.1._id: "1"
---
"Sigmoid":
- do:
search:
body:
query:
feature:
field: tags.bar
sigmoid:
pivot: 20
exponent: 0.6
- match:
hits.total: 2
- match:
hits.hits.0._id: "2"
- match:
hits.hits.1._id: "1"