Merge pull request #18183 from brwe/exclude-all-but-text-from-wildcard-highlighting

Exclude all but string fields from highlighting if wildcards are used…
This commit is contained in:
Britta Weber 2016-05-06 16:57:25 +02:00
commit 901c25b7ff
6 changed files with 364 additions and 3 deletions

View File

@ -26,6 +26,9 @@ import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.core.TextFieldMapper;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchSubPhase;
@ -102,6 +105,21 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
continue;
}
// We should prevent highlighting if a field is anything but a text or keyword field.
// However, someone might implement a custom field type that has text and still want to
// highlight on that. We cannot know in advance if the highlighter will be able to
// highlight such a field and so we do the following:
// If the field is only highlighted because the field matches a wildcard we assume
// it was a mistake and do not process it.
// If the field was explicitly given we assume that whoever issued the query knew
// what they were doing and try to highlight anyway.
if (fieldNameContainsWildcards) {
if (fieldMapper.fieldType().typeName().equals(TextFieldMapper.CONTENT_TYPE) == false &&
fieldMapper.fieldType().typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false &&
fieldMapper.fieldType().typeName().equals(StringFieldMapper.CONTENT_TYPE) == false) {
continue;
}
}
String highlighterType = field.fieldOptions().highlighterType();
if (highlighterType == null) {
for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {

View File

@ -43,6 +43,7 @@ public class ExternalMapperPlugin extends Plugin {
indicesModule.registerMapper(EXTERNAL, new ExternalMapper.TypeParser(EXTERNAL, "foo"));
indicesModule.registerMapper(EXTERNAL_BIS, new ExternalMapper.TypeParser(EXTERNAL_BIS, "bar"));
indicesModule.registerMapper(EXTERNAL_UPPER, new ExternalMapper.TypeParser(EXTERNAL_UPPER, "FOO BAR"));
indicesModule.registerMapper(FakeStringFieldMapper.CONTENT_TYPE, new FakeStringFieldMapper.TypeParser());
}
}
}

View File

@ -25,10 +25,12 @@ import org.elasticsearch.common.geo.builders.ShapeBuilders;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import java.util.Collection;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
public class ExternalValuesMapperIntegrationIT extends ESIntegTestCase {
@ -37,6 +39,54 @@ public class ExternalValuesMapperIntegrationIT extends ESIntegTestCase {
return pluginList(ExternalMapperPlugin.class);
}
public void testHighlightingOnCustomString() throws Exception {
prepareCreate("test-idx").addMapping("type",
XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field").field("type", FakeStringFieldMapper.CONTENT_TYPE).endObject()
.endObject()
.endObject().endObject()).execute().get();
ensureYellow("test-idx");
index("test-idx", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", "Every day is exactly the same")
.endObject());
refresh();
SearchResponse response;
// test if the highlighting is excluded when we use wildcards
response = client().prepareSearch("test-idx")
.setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
.highlighter(new HighlightBuilder().field("*"))
.execute().actionGet();
assertSearchResponse(response);
assertThat(response.getHits().getTotalHits(), equalTo(1L));
assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
// make sure it is not excluded when we explicitly provide the fieldname
response = client().prepareSearch("test-idx")
.setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
.highlighter(new HighlightBuilder().field("field"))
.execute().actionGet();
assertSearchResponse(response);
assertThat(response.getHits().getTotalHits(), equalTo(1L));
assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(1));
assertThat(response.getHits().getAt(0).getHighlightFields().get("field").fragments()[0].string(), equalTo("Every day is " +
"<em>exactly</em> <em>the</em> <em>same</em>"));
// make sure it is not excluded when we explicitly provide the fieldname and a wildcard
response = client().prepareSearch("test-idx")
.setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
.highlighter(new HighlightBuilder().field("*").field("field"))
.execute().actionGet();
assertSearchResponse(response);
assertThat(response.getHits().getTotalHits(), equalTo(1L));
assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(1));
assertThat(response.getHits().getAt(0).getHighlightFields().get("field").fragments()[0].string(), equalTo("Every day is " +
"<em>exactly</em> <em>the</em> <em>same</em>"));
}
public void testExternalValues() throws Exception {
prepareCreate("test-idx").addMapping("type",
XContentFactory.jsonBuilder().startObject().startObject("type")

View File

@ -0,0 +1,193 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.externalvalues;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.query.QueryShardContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseTextField;
// Like a String mapper but with very few options. We just use it to test if highlighting on a custom string mapped field works as expected.
public class FakeStringFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "fake_string";
public static class Defaults {
public static final MappedFieldType FIELD_TYPE = new FakeStringFieldType();
static {
FIELD_TYPE.freeze();
}
}
public static class Builder extends FieldMapper.Builder<Builder, FakeStringFieldMapper> {
public Builder(String name) {
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
builder = this;
}
@Override
public FakeStringFieldType fieldType() {
return (FakeStringFieldType) super.fieldType();
}
@Override
public FakeStringFieldMapper build(BuilderContext context) {
setupFieldType(context);
return new FakeStringFieldMapper(
name, fieldType(), defaultFieldType,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
public static class TypeParser implements Mapper.TypeParser {
public TypeParser() {
}
@Override
public Mapper.Builder parse(String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
FakeStringFieldMapper.Builder builder = new FakeStringFieldMapper.Builder(fieldName);
parseTextField(builder, fieldName, node, parserContext);
return builder;
}
}
public static final class FakeStringFieldType extends MappedFieldType {
public FakeStringFieldType() {
}
protected FakeStringFieldType(FakeStringFieldType ref) {
super(ref);
}
public FakeStringFieldType clone() {
return new FakeStringFieldType(this);
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public Query nullValueQuery() {
if (nullValue() == null) {
return null;
}
return termQuery(nullValue(), null);
}
@Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method,
@Nullable QueryShardContext context) {
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
if (method != null) {
query.setRewriteMethod(method);
}
return query;
}
}
protected FakeStringFieldMapper(String simpleName, FakeStringFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
}
@Override
protected StringFieldMapper clone() {
return (StringFieldMapper) super.clone();
}
@Override
protected boolean customBoost() {
return true;
}
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
StringFieldMapper.ValueAndBoost valueAndBoost = parseCreateFieldForString(context, fieldType().boost());
if (valueAndBoost.value() == null) {
return;
}
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
Field field = new Field(fieldType().name(), valueAndBoost.value(), fieldType());
fields.add(field);
}
if (fieldType().hasDocValues()) {
fields.add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(valueAndBoost.value())));
}
}
public static StringFieldMapper.ValueAndBoost parseCreateFieldForString(ParseContext context, float defaultBoost) throws IOException {
if (context.externalValueSet()) {
return new StringFieldMapper.ValueAndBoost(context.externalValue().toString(), defaultBoost);
}
XContentParser parser = context.parser();
return new StringFieldMapper.ValueAndBoost(parser.textOrNull(), defaultBoost);
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
@Override
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
super.doMerge(mergeWith, updateAllTypes);
}
@Override
public FakeStringFieldType fieldType() {
return (FakeStringFieldType) super.fieldType();
}
@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
doXContentAnalyzers(builder, includeDefaults);
}
}

View File

@ -19,10 +19,11 @@
package org.elasticsearch.search.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.elasticsearch.Version;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.Settings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -36,15 +37,18 @@ import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MatchQuery.Type;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder.Field;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.hamcrest.Matcher;
import org.hamcrest.Matchers;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
@ -85,6 +89,12 @@ import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.startsWith;
public class HighlighterSearchIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return pluginList(InternalSettingsPlugin.class);
}
public void testHighlightingWithWildcardName() throws IOException {
// test the kibana case with * as fieldname that will try highlight all fields including meta fields
XContentBuilder mappings = jsonBuilder();
@ -2542,4 +2552,90 @@ public class HighlighterSearchIT extends ESIntegTestCase {
response = search.setQuery(boostingQuery(phrase, terms).boost(1).negativeBoost(1/boost)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
}
public void testGeoFieldHighlighting() throws IOException {
// check that we do not get an exception for geo_point fields in case someone tries to highlight
// it accidential with a wildcard
// see https://github.com/elastic/elasticsearch/issues/17537
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("geo_point")
.field("type", "geo_point")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("geo_point", "60.12,100.34").endObject())
.get();
refresh();
SearchResponse search = client().prepareSearch().setSource(
new SearchSourceBuilder().query(QueryBuilders.geoBoundingBoxQuery("geo_point").setCorners(61.10078883158897, -170.15625,
-64.92354174306496, 118.47656249999999)).highlighter(new HighlightBuilder().field("*"))).get();
assertNoFailures(search);
assertThat(search.getHits().totalHits(), equalTo(1L));
}
public void testKeywordFieldHighlighting() throws IOException {
// check that keyword highlighting works
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("keyword_field")
.field("type", "keyword")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("keyword_field", "some text").endObject())
.get();
refresh();
SearchResponse search = client().prepareSearch().setSource(
new SearchSourceBuilder().query(QueryBuilders.matchQuery("keyword_field", "some text")).highlighter(new HighlightBuilder().field("*")))
.get();
assertNoFailures(search);
assertThat(search.getHits().totalHits(), equalTo(1L));
assertThat(search.getHits().getAt(0).getHighlightFields().get("keyword_field").getFragments()[0].string(), equalTo("<em>some text</em>"));
}
public void testStringFieldHighlighting() throws IOException {
// check that string field highlighting on old indexes works
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("string_field")
.field("type", "string")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings)
.setSettings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_3_2)));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("string_field", "some text").endObject())
.get();
refresh();
SearchResponse search = client().prepareSearch().setSource(
new SearchSourceBuilder().query(QueryBuilders.matchQuery("string_field", "some text")).highlighter(new HighlightBuilder().field("*")))
.get();
assertNoFailures(search);
assertThat(search.getHits().totalHits(), equalTo(1L));
assertThat(search.getHits().getAt(0).getHighlightFields().get("string_field").getFragments()[0].string(), equalTo("<em>some</em> <em>text</em>"));
}
}

View File

@ -35,7 +35,10 @@ be used for highlighting if it mapped to have `store` set to `true`.
==================================
The field name supports wildcard notation. For example, using `comment_*`
will cause all fields that match the expression to be highlighted.
will cause all <<text,text>> and <<keyword,keyword>> fields (and <<string,string>>
from versions before 5.0) that match the expression to be highlighted.
Note that all other fields will not be highlighted. If you use a custom mapper and want to
highlight on a field anyway, you have to provide the field name explicitly.
[[plain-highlighter]]
==== Plain highlighter