Highlighting: Automatically use the field values extracted from _source if not stored explicitly in the mapping, closes #561.

This commit is contained in:
kimchy 2010-12-12 04:40:25 +02:00
parent fe3f5d45de
commit 216b2ab912
5 changed files with 307 additions and 9 deletions

View File

@ -27,9 +27,12 @@ import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.search.SearchException;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.SearchHitPhase;
import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder;
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.SearchContext;
@ -57,20 +60,19 @@ public class HighlightPhase implements SearchHitPhase {
Map<String, HighlightField> highlightFields = newHashMap();
for (SearchContextHighlight.Field field : context.highlight().fields()) {
String fieldName = field.field();
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
if (mapper != null) {
fieldName = mapper.names().indexName();
if (mapper == null) {
throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]");
}
FastVectorHighlighter highlighter = buildHighlighter(field);
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
String[] fragments;
try {
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), numberOfFragments);
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
@ -91,18 +93,30 @@ public class HighlightPhase implements SearchHitPhase {
return new CustomFieldQuery(query, highlighter);
}
private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) {
private FastVectorHighlighter buildHighlighter(SearchContext searchContext, FieldMapper fieldMapper, SearchContextHighlight.Field field) {
FragListBuilder fragListBuilder;
FragmentsBuilder fragmentsBuilder;
if (field.numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
if (fieldMapper.stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
} else {
fragListBuilder = new SimpleFragListBuilder();
if (field.scoreOrdered()) {
if (fieldMapper.stored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
} else {
if (fieldMapper.stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.highlight.vectorhighlight;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
private final FieldMapper mapper;
private final SearchContext searchContext;
public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
String[] preTags, String[] postTags) {
super(preTags, postTags);
this.mapper = mapper;
this.searchContext = searchContext;
}
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SearchLookup lookup = searchContext.lookup();
lookup.setNextReader(reader);
lookup.setNextDocId(docId);
List<Object> values = lookup.source().getValues(mapper.names().fullName());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
}
return fields;
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.highlight.vectorhighlight;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
private final FieldMapper mapper;
private final SearchContext searchContext;
public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
String[] preTags, String[] postTags) {
super(preTags, postTags);
this.mapper = mapper;
this.searchContext = searchContext;
}
public static Field[] EMPTY_FIELDS = new Field[0];
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SearchLookup lookup = searchContext.lookup();
lookup.setNextReader(reader);
lookup.setNextDocId(docId);
List<Object> values = lookup.source().getValues(mapper.names().fullName());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
}
return fields;
}
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.compress.lzf.LZFDecoder;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.common.io.stream.CachedStreamInput;
@ -34,12 +35,15 @@ import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.SourceFieldSelector;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* @author kimchy (shay.banon)
*/
// TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded..
public class SourceLookup implements Map {
private IndexReader reader;
@ -99,6 +103,54 @@ public class SourceLookup implements Map {
this.source = null;
}
private final static Pattern dotPattern = Pattern.compile("\\.");
/**
* Returns the values associated with the path. Those are "low" level values, and it can
* handle path expression where an array/list is navigated within.
*/
public List<Object> getValues(String path) {
List<Object> values = Lists.newArrayList();
String[] pathElements = dotPattern.split(path);
getValues(values, loadSourceIfNeeded(), pathElements, 0);
return values;
}
@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, Map<String, Object> part, String[] pathElements, int index) {
if (index == pathElements.length) {
return;
}
String currentPath = pathElements[index];
Object currentValue = part.get(currentPath);
if (currentValue == null) {
return;
}
if (currentValue instanceof Map) {
getValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
} else if (currentValue instanceof List) {
getValues(values, (List<Object>) currentValue, pathElements, index + 1);
} else {
values.add(currentValue);
}
}
@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, List<Object> part, String[] pathElements, int index) {
for (Object value : part) {
if (value == null) {
continue;
}
if (value instanceof Map) {
getValues(values, (Map<String, Object>) value, pathElements, index);
} else if (value instanceof List) {
getValues(values, (List<Object>) value, pathElements, index);
} else {
values.add(value);
}
}
}
@Override public Object get(Object key) {
return loadSourceIfNeeded().get(key);
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
public class SourceFieldHighlightingTests extends AbstractNodesTests {
private Client client;
@BeforeClass public void createNodes() throws Exception {
startNode("node1");
client = getClient();
}
@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("node1");
}
@Test public void testSourceLookupHighlighting() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
// we don't store title, now lets see if it works...
.startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
.startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject()
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource(XContentFactory.jsonBuilder().startObject()
.field("title", "This is a test on the highlighting bug present in elasticsearch")
.startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray()
.endObject())
.setRefresh(true).execute().actionGet();
}
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
}
search = client.prepareSearch()
.setQuery(fieldQuery("attachments.body", "attachment"))
.addHighlightedField("attachments.body", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
}
}
}