Highlighting: Automatically use the field values extracted from _source if not stored explicitly in the mapping, closes #561.
This commit is contained in:
parent
fe3f5d45de
commit
216b2ab912
|
@ -27,9 +27,12 @@ import org.elasticsearch.common.collect.ImmutableMap;
|
|||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.search.SearchException;
|
||||
import org.elasticsearch.search.SearchParseElement;
|
||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||
import org.elasticsearch.search.fetch.SearchHitPhase;
|
||||
import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder;
|
||||
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
|
||||
import org.elasticsearch.search.internal.InternalSearchHit;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
|
@ -57,20 +60,19 @@ public class HighlightPhase implements SearchHitPhase {
|
|||
|
||||
Map<String, HighlightField> highlightFields = newHashMap();
|
||||
for (SearchContextHighlight.Field field : context.highlight().fields()) {
|
||||
String fieldName = field.field();
|
||||
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
|
||||
if (mapper != null) {
|
||||
fieldName = mapper.names().indexName();
|
||||
if (mapper == null) {
|
||||
throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]");
|
||||
}
|
||||
|
||||
FastVectorHighlighter highlighter = buildHighlighter(field);
|
||||
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
|
||||
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
|
||||
|
||||
String[] fragments;
|
||||
try {
|
||||
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
|
||||
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
|
||||
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), numberOfFragments);
|
||||
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
|
||||
} catch (IOException e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
|
||||
}
|
||||
|
@ -91,18 +93,30 @@ public class HighlightPhase implements SearchHitPhase {
|
|||
return new CustomFieldQuery(query, highlighter);
|
||||
}
|
||||
|
||||
private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) {
|
||||
private FastVectorHighlighter buildHighlighter(SearchContext searchContext, FieldMapper fieldMapper, SearchContextHighlight.Field field) {
|
||||
FragListBuilder fragListBuilder;
|
||||
FragmentsBuilder fragmentsBuilder;
|
||||
if (field.numberOfFragments() == 0) {
|
||||
fragListBuilder = new SingleFragListBuilder();
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||
if (fieldMapper.stored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||
} else {
|
||||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||
}
|
||||
} else {
|
||||
fragListBuilder = new SimpleFragListBuilder();
|
||||
if (field.scoreOrdered()) {
|
||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
||||
if (fieldMapper.stored()) {
|
||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
||||
} else {
|
||||
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||
}
|
||||
} else {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||
if (fieldMapper.stored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||
} else {
|
||||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
|
||||
|
||||
private final FieldMapper mapper;
|
||||
|
||||
private final SearchContext searchContext;
|
||||
|
||||
public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
|
||||
String[] preTags, String[] postTags) {
|
||||
super(preTags, postTags);
|
||||
this.mapper = mapper;
|
||||
this.searchContext = searchContext;
|
||||
}
|
||||
|
||||
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
|
||||
// we know its low level reader, and matching docId, since that's how we call the highlighter with
|
||||
SearchLookup lookup = searchContext.lookup();
|
||||
lookup.setNextReader(reader);
|
||||
lookup.setNextDocId(docId);
|
||||
|
||||
List<Object> values = lookup.source().getValues(mapper.names().fullName());
|
||||
Field[] fields = new Field[values.size()];
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
|
||||
|
||||
private final FieldMapper mapper;
|
||||
|
||||
private final SearchContext searchContext;
|
||||
|
||||
public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
|
||||
String[] preTags, String[] postTags) {
|
||||
super(preTags, postTags);
|
||||
this.mapper = mapper;
|
||||
this.searchContext = searchContext;
|
||||
}
|
||||
|
||||
public static Field[] EMPTY_FIELDS = new Field[0];
|
||||
|
||||
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
|
||||
// we know its low level reader, and matching docId, since that's how we call the highlighter with
|
||||
SearchLookup lookup = searchContext.lookup();
|
||||
lookup.setNextReader(reader);
|
||||
lookup.setNextDocId(docId);
|
||||
|
||||
List<Object> values = lookup.source().getValues(mapper.names().fullName());
|
||||
if (values.isEmpty()) {
|
||||
return EMPTY_FIELDS;
|
||||
}
|
||||
Field[] fields = new Field[values.size()];
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
}
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.elasticsearch.ElasticSearchParseException;
|
||||
import org.elasticsearch.common.collect.Lists;
|
||||
import org.elasticsearch.common.compress.lzf.LZFDecoder;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||
import org.elasticsearch.common.io.stream.CachedStreamInput;
|
||||
|
@ -34,12 +35,15 @@ import org.elasticsearch.index.mapper.SourceFieldMapper;
|
|||
import org.elasticsearch.index.mapper.SourceFieldSelector;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
// TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded..
|
||||
public class SourceLookup implements Map {
|
||||
|
||||
private IndexReader reader;
|
||||
|
@ -99,6 +103,54 @@ public class SourceLookup implements Map {
|
|||
this.source = null;
|
||||
}
|
||||
|
||||
private final static Pattern dotPattern = Pattern.compile("\\.");
|
||||
|
||||
/**
|
||||
* Returns the values associated with the path. Those are "low" level values, and it can
|
||||
* handle path expression where an array/list is navigated within.
|
||||
*/
|
||||
public List<Object> getValues(String path) {
|
||||
List<Object> values = Lists.newArrayList();
|
||||
String[] pathElements = dotPattern.split(path);
|
||||
getValues(values, loadSourceIfNeeded(), pathElements, 0);
|
||||
return values;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void getValues(List<Object> values, Map<String, Object> part, String[] pathElements, int index) {
|
||||
if (index == pathElements.length) {
|
||||
return;
|
||||
}
|
||||
String currentPath = pathElements[index];
|
||||
Object currentValue = part.get(currentPath);
|
||||
if (currentValue == null) {
|
||||
return;
|
||||
}
|
||||
if (currentValue instanceof Map) {
|
||||
getValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
|
||||
} else if (currentValue instanceof List) {
|
||||
getValues(values, (List<Object>) currentValue, pathElements, index + 1);
|
||||
} else {
|
||||
values.add(currentValue);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void getValues(List<Object> values, List<Object> part, String[] pathElements, int index) {
|
||||
for (Object value : part) {
|
||||
if (value == null) {
|
||||
continue;
|
||||
}
|
||||
if (value instanceof Map) {
|
||||
getValues(values, (Map<String, Object>) value, pathElements, index);
|
||||
} else if (value instanceof List) {
|
||||
getValues(values, (List<Object>) value, pathElements, index);
|
||||
} else {
|
||||
values.add(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override public Object get(Object key) {
|
||||
return loadSourceIfNeeded().get(key);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.integration.search.highlight;
|
||||
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.*;
|
||||
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
|
||||
import static org.hamcrest.MatcherAssert.*;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SourceFieldHighlightingTests extends AbstractNodesTests {
|
||||
|
||||
private Client client;
|
||||
|
||||
@BeforeClass public void createNodes() throws Exception {
|
||||
startNode("node1");
|
||||
client = getClient();
|
||||
}
|
||||
|
||||
@AfterClass public void closeNodes() {
|
||||
client.close();
|
||||
closeAllNodes();
|
||||
}
|
||||
|
||||
protected Client getClient() {
|
||||
return client("node1");
|
||||
}
|
||||
|
||||
@Test public void testSourceLookupHighlighting() throws Exception {
|
||||
try {
|
||||
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
// we don't store title, now lets see if it works...
|
||||
.startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
|
||||
.startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject()
|
||||
.endObject().endObject().endObject())
|
||||
.execute().actionGet();
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
client.prepareIndex("test", "type1", Integer.toString(i))
|
||||
.setSource(XContentFactory.jsonBuilder().startObject()
|
||||
.field("title", "This is a test on the highlighting bug present in elasticsearch")
|
||||
.startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray()
|
||||
.endObject())
|
||||
.setRefresh(true).execute().actionGet();
|
||||
}
|
||||
|
||||
SearchResponse search = client.prepareSearch()
|
||||
.setQuery(fieldQuery("title", "bug"))
|
||||
.addHighlightedField("title", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
|
||||
}
|
||||
|
||||
search = client.prepareSearch()
|
||||
.setQuery(fieldQuery("attachments.body", "attachment"))
|
||||
.addHighlightedField("attachments.body", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue