mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-13 08:25:26 +00:00
Highlighting: Automatically use the field values extracted from _source if not stored explicitly in the mapping, closes #561.
This commit is contained in:
parent
fe3f5d45de
commit
216b2ab912
@ -27,9 +27,12 @@ import org.elasticsearch.common.collect.ImmutableMap;
|
|||||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper;
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
import org.elasticsearch.index.mapper.Uid;
|
import org.elasticsearch.index.mapper.Uid;
|
||||||
|
import org.elasticsearch.search.SearchException;
|
||||||
import org.elasticsearch.search.SearchParseElement;
|
import org.elasticsearch.search.SearchParseElement;
|
||||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||||
import org.elasticsearch.search.fetch.SearchHitPhase;
|
import org.elasticsearch.search.fetch.SearchHitPhase;
|
||||||
|
import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder;
|
||||||
|
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
|
||||||
import org.elasticsearch.search.internal.InternalSearchHit;
|
import org.elasticsearch.search.internal.InternalSearchHit;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
@ -57,20 +60,19 @@ public class HighlightPhase implements SearchHitPhase {
|
|||||||
|
|
||||||
Map<String, HighlightField> highlightFields = newHashMap();
|
Map<String, HighlightField> highlightFields = newHashMap();
|
||||||
for (SearchContextHighlight.Field field : context.highlight().fields()) {
|
for (SearchContextHighlight.Field field : context.highlight().fields()) {
|
||||||
String fieldName = field.field();
|
|
||||||
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
|
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
|
||||||
if (mapper != null) {
|
if (mapper == null) {
|
||||||
fieldName = mapper.names().indexName();
|
throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]");
|
||||||
}
|
}
|
||||||
|
|
||||||
FastVectorHighlighter highlighter = buildHighlighter(field);
|
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
|
||||||
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
|
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
|
||||||
|
|
||||||
String[] fragments;
|
String[] fragments;
|
||||||
try {
|
try {
|
||||||
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
|
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
|
||||||
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
|
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
|
||||||
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), numberOfFragments);
|
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
|
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
|
||||||
}
|
}
|
||||||
@ -91,18 +93,30 @@ public class HighlightPhase implements SearchHitPhase {
|
|||||||
return new CustomFieldQuery(query, highlighter);
|
return new CustomFieldQuery(query, highlighter);
|
||||||
}
|
}
|
||||||
|
|
||||||
private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) {
|
private FastVectorHighlighter buildHighlighter(SearchContext searchContext, FieldMapper fieldMapper, SearchContextHighlight.Field field) {
|
||||||
FragListBuilder fragListBuilder;
|
FragListBuilder fragListBuilder;
|
||||||
FragmentsBuilder fragmentsBuilder;
|
FragmentsBuilder fragmentsBuilder;
|
||||||
if (field.numberOfFragments() == 0) {
|
if (field.numberOfFragments() == 0) {
|
||||||
fragListBuilder = new SingleFragListBuilder();
|
fragListBuilder = new SingleFragListBuilder();
|
||||||
|
if (fieldMapper.stored()) {
|
||||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||||
|
} else {
|
||||||
|
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
fragListBuilder = new SimpleFragListBuilder();
|
fragListBuilder = new SimpleFragListBuilder();
|
||||||
if (field.scoreOrdered()) {
|
if (field.scoreOrdered()) {
|
||||||
|
if (fieldMapper.stored()) {
|
||||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
||||||
} else {
|
} else {
|
||||||
|
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fieldMapper.stored()) {
|
||||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||||
|
} else {
|
||||||
|
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
|
||||||
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
|
||||||
|
|
||||||
|
private final FieldMapper mapper;
|
||||||
|
|
||||||
|
private final SearchContext searchContext;
|
||||||
|
|
||||||
|
public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
|
||||||
|
String[] preTags, String[] postTags) {
|
||||||
|
super(preTags, postTags);
|
||||||
|
this.mapper = mapper;
|
||||||
|
this.searchContext = searchContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
|
||||||
|
// we know its low level reader, and matching docId, since that's how we call the highlighter with
|
||||||
|
SearchLookup lookup = searchContext.lookup();
|
||||||
|
lookup.setNextReader(reader);
|
||||||
|
lookup.setNextDocId(docId);
|
||||||
|
|
||||||
|
List<Object> values = lookup.source().getValues(mapper.names().fullName());
|
||||||
|
Field[] fields = new Field[values.size()];
|
||||||
|
for (int i = 0; i < values.size(); i++) {
|
||||||
|
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
}
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
|
||||||
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
|
||||||
|
|
||||||
|
private final FieldMapper mapper;
|
||||||
|
|
||||||
|
private final SearchContext searchContext;
|
||||||
|
|
||||||
|
public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
|
||||||
|
String[] preTags, String[] postTags) {
|
||||||
|
super(preTags, postTags);
|
||||||
|
this.mapper = mapper;
|
||||||
|
this.searchContext = searchContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Field[] EMPTY_FIELDS = new Field[0];
|
||||||
|
|
||||||
|
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
|
||||||
|
// we know its low level reader, and matching docId, since that's how we call the highlighter with
|
||||||
|
SearchLookup lookup = searchContext.lookup();
|
||||||
|
lookup.setNextReader(reader);
|
||||||
|
lookup.setNextDocId(docId);
|
||||||
|
|
||||||
|
List<Object> values = lookup.source().getValues(mapper.names().fullName());
|
||||||
|
if (values.isEmpty()) {
|
||||||
|
return EMPTY_FIELDS;
|
||||||
|
}
|
||||||
|
Field[] fields = new Field[values.size()];
|
||||||
|
for (int i = 0; i < values.size(); i++) {
|
||||||
|
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
}
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
|
|||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.elasticsearch.ElasticSearchParseException;
|
import org.elasticsearch.ElasticSearchParseException;
|
||||||
|
import org.elasticsearch.common.collect.Lists;
|
||||||
import org.elasticsearch.common.compress.lzf.LZFDecoder;
|
import org.elasticsearch.common.compress.lzf.LZFDecoder;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
import org.elasticsearch.common.io.stream.CachedStreamInput;
|
import org.elasticsearch.common.io.stream.CachedStreamInput;
|
||||||
@ -34,12 +35,15 @@ import org.elasticsearch.index.mapper.SourceFieldMapper;
|
|||||||
import org.elasticsearch.index.mapper.SourceFieldSelector;
|
import org.elasticsearch.index.mapper.SourceFieldSelector;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author kimchy (shay.banon)
|
* @author kimchy (shay.banon)
|
||||||
*/
|
*/
|
||||||
|
// TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded..
|
||||||
public class SourceLookup implements Map {
|
public class SourceLookup implements Map {
|
||||||
|
|
||||||
private IndexReader reader;
|
private IndexReader reader;
|
||||||
@ -99,6 +103,54 @@ public class SourceLookup implements Map {
|
|||||||
this.source = null;
|
this.source = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static Pattern dotPattern = Pattern.compile("\\.");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the values associated with the path. Those are "low" level values, and it can
|
||||||
|
* handle path expression where an array/list is navigated within.
|
||||||
|
*/
|
||||||
|
public List<Object> getValues(String path) {
|
||||||
|
List<Object> values = Lists.newArrayList();
|
||||||
|
String[] pathElements = dotPattern.split(path);
|
||||||
|
getValues(values, loadSourceIfNeeded(), pathElements, 0);
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings({"unchecked"})
|
||||||
|
private void getValues(List<Object> values, Map<String, Object> part, String[] pathElements, int index) {
|
||||||
|
if (index == pathElements.length) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String currentPath = pathElements[index];
|
||||||
|
Object currentValue = part.get(currentPath);
|
||||||
|
if (currentValue == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (currentValue instanceof Map) {
|
||||||
|
getValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
|
||||||
|
} else if (currentValue instanceof List) {
|
||||||
|
getValues(values, (List<Object>) currentValue, pathElements, index + 1);
|
||||||
|
} else {
|
||||||
|
values.add(currentValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings({"unchecked"})
|
||||||
|
private void getValues(List<Object> values, List<Object> part, String[] pathElements, int index) {
|
||||||
|
for (Object value : part) {
|
||||||
|
if (value == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (value instanceof Map) {
|
||||||
|
getValues(values, (Map<String, Object>) value, pathElements, index);
|
||||||
|
} else if (value instanceof List) {
|
||||||
|
getValues(values, (List<Object>) value, pathElements, index);
|
||||||
|
} else {
|
||||||
|
values.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override public Object get(Object key) {
|
@Override public Object get(Object key) {
|
||||||
return loadSourceIfNeeded().get(key);
|
return loadSourceIfNeeded().get(key);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.integration.search.highlight;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.search.SearchHit;
|
||||||
|
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||||
|
import org.testng.annotations.AfterClass;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.*;
|
||||||
|
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
|
||||||
|
import static org.hamcrest.MatcherAssert.*;
|
||||||
|
import static org.hamcrest.Matchers.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class SourceFieldHighlightingTests extends AbstractNodesTests {
|
||||||
|
|
||||||
|
private Client client;
|
||||||
|
|
||||||
|
@BeforeClass public void createNodes() throws Exception {
|
||||||
|
startNode("node1");
|
||||||
|
client = getClient();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass public void closeNodes() {
|
||||||
|
client.close();
|
||||||
|
closeAllNodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Client getClient() {
|
||||||
|
return client("node1");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testSourceLookupHighlighting() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
|
||||||
|
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||||
|
// we don't store title, now lets see if it works...
|
||||||
|
.startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
|
||||||
|
.startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject()
|
||||||
|
.endObject().endObject().endObject())
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
client.prepareIndex("test", "type1", Integer.toString(i))
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject()
|
||||||
|
.field("title", "This is a test on the highlighting bug present in elasticsearch")
|
||||||
|
.startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray()
|
||||||
|
.endObject())
|
||||||
|
.setRefresh(true).execute().actionGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
SearchResponse search = client.prepareSearch()
|
||||||
|
.setQuery(fieldQuery("title", "bug"))
|
||||||
|
.addHighlightedField("title", -1, 0)
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||||
|
|
||||||
|
for (SearchHit hit : search.hits()) {
|
||||||
|
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
|
||||||
|
}
|
||||||
|
|
||||||
|
search = client.prepareSearch()
|
||||||
|
.setQuery(fieldQuery("attachments.body", "attachment"))
|
||||||
|
.addHighlightedField("attachments.body", -1, 0)
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||||
|
|
||||||
|
for (SearchHit hit : search.hits()) {
|
||||||
|
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user