From 216b2ab91256ed6bc6ec513249e16b30716095e4 Mon Sep 17 00:00:00 2001 From: kimchy Date: Sun, 12 Dec 2010 04:40:25 +0200 Subject: [PATCH] Highlighting: Automatically use the field values extracted from _source if not stored explicitly in the mapping, closes #561. --- .../search/highlight/HighlightPhase.java | 32 ++++-- .../SourceScoreOrderFragmentsBuilder.java | 61 ++++++++++ .../SourceSimpleFragmentsBuilder.java | 67 +++++++++++ .../search/lookup/SourceLookup.java | 52 +++++++++ .../SourceFieldHighlightingTests.java | 104 ++++++++++++++++++ 5 files changed, 307 insertions(+), 9 deletions(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java create mode 100644 modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/SourceFieldHighlightingTests.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 2218f7d2f51..efd65f22b91 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -27,9 +27,12 @@ import org.elasticsearch.common.collect.ImmutableMap; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.Uid; +import org.elasticsearch.search.SearchException; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.SearchHitPhase; +import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder; +import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.SearchContext; @@ -57,20 +60,19 @@ public class HighlightPhase implements SearchHitPhase { Map highlightFields = newHashMap(); for (SearchContextHighlight.Field field : context.highlight().fields()) { - String fieldName = field.field(); FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field()); - if (mapper != null) { - fieldName = mapper.names().indexName(); + if (mapper == null) { + throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]"); } - FastVectorHighlighter highlighter = buildHighlighter(field); + FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field); FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field); String[] fragments; try { // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments(); - fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), numberOfFragments); + fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments); } catch (IOException e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } @@ -91,18 +93,30 @@ public class HighlightPhase implements SearchHitPhase { return new CustomFieldQuery(query, highlighter); } - private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) { + private FastVectorHighlighter buildHighlighter(SearchContext searchContext, FieldMapper fieldMapper, SearchContextHighlight.Field field) { FragListBuilder fragListBuilder; FragmentsBuilder fragmentsBuilder; if (field.numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); - fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); + if (fieldMapper.stored()) { + fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); + } else { + fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags()); + } } else { fragListBuilder = new SimpleFragListBuilder(); if (field.scoreOrdered()) { - fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags()); + if (fieldMapper.stored()) { + fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags()); + } else { + fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags()); + } } else { - fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); + if (fieldMapper.stored()) { + fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); + } else { + fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags()); + } } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java new file mode 100644 index 00000000000..72b819bd5b8 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java @@ -0,0 +1,61 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.highlight.vectorhighlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder { + + private final FieldMapper mapper; + + private final SearchContext searchContext; + + public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, + String[] preTags, String[] postTags) { + super(preTags, postTags); + this.mapper = mapper; + this.searchContext = searchContext; + } + + @Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { + // we know its low level reader, and matching docId, since that's how we call the highlighter with + SearchLookup lookup = searchContext.lookup(); + lookup.setNextReader(reader); + lookup.setNextDocId(docId); + + List values = lookup.source().getValues(mapper.names().fullName()); + Field[] fields = new Field[values.size()]; + for (int i = 0; i < values.size(); i++) { + fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED); + } + return fields; + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java new file mode 100644 index 00000000000..2572d3e9747 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java @@ -0,0 +1,67 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.highlight.vectorhighlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder { + + private final FieldMapper mapper; + + private final SearchContext searchContext; + + public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, + String[] preTags, String[] postTags) { + super(preTags, postTags); + this.mapper = mapper; + this.searchContext = searchContext; + } + + public static Field[] EMPTY_FIELDS = new Field[0]; + + @Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { + // we know its low level reader, and matching docId, since that's how we call the highlighter with + SearchLookup lookup = searchContext.lookup(); + lookup.setNextReader(reader); + lookup.setNextDocId(docId); + + List values = lookup.source().getValues(mapper.names().fullName()); + if (values.isEmpty()) { + return EMPTY_FIELDS; + } + Field[] fields = new Field[values.size()]; + for (int i = 0; i < values.size(); i++) { + fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED); + } + return fields; + } + +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index fe231abb2a4..6d61ba811ab 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticSearchParseException; +import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.compress.lzf.LZFDecoder; import org.elasticsearch.common.io.stream.BytesStreamInput; import org.elasticsearch.common.io.stream.CachedStreamInput; @@ -34,12 +35,15 @@ import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceFieldSelector; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.Set; +import java.util.regex.Pattern; /** * @author kimchy (shay.banon) */ +// TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded.. public class SourceLookup implements Map { private IndexReader reader; @@ -99,6 +103,54 @@ public class SourceLookup implements Map { this.source = null; } + private final static Pattern dotPattern = Pattern.compile("\\."); + + /** + * Returns the values associated with the path. Those are "low" level values, and it can + * handle path expression where an array/list is navigated within. + */ + public List getValues(String path) { + List values = Lists.newArrayList(); + String[] pathElements = dotPattern.split(path); + getValues(values, loadSourceIfNeeded(), pathElements, 0); + return values; + } + + @SuppressWarnings({"unchecked"}) + private void getValues(List values, Map part, String[] pathElements, int index) { + if (index == pathElements.length) { + return; + } + String currentPath = pathElements[index]; + Object currentValue = part.get(currentPath); + if (currentValue == null) { + return; + } + if (currentValue instanceof Map) { + getValues(values, (Map) currentValue, pathElements, index + 1); + } else if (currentValue instanceof List) { + getValues(values, (List) currentValue, pathElements, index + 1); + } else { + values.add(currentValue); + } + } + + @SuppressWarnings({"unchecked"}) + private void getValues(List values, List part, String[] pathElements, int index) { + for (Object value : part) { + if (value == null) { + continue; + } + if (value instanceof Map) { + getValues(values, (Map) value, pathElements, index); + } else if (value instanceof List) { + getValues(values, (List) value, pathElements, index); + } else { + values.add(value); + } + } + } + @Override public Object get(Object key) { return loadSourceIfNeeded().get(key); } diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/SourceFieldHighlightingTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/SourceFieldHighlightingTests.java new file mode 100644 index 00000000000..ff97217017c --- /dev/null +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/SourceFieldHighlightingTests.java @@ -0,0 +1,104 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.integration.search.highlight; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.integration.AbstractNodesTests; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import static org.elasticsearch.common.xcontent.XContentFactory.*; +import static org.elasticsearch.index.query.xcontent.QueryBuilders.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +/** + * @author kimchy (shay.banon) + */ +public class SourceFieldHighlightingTests extends AbstractNodesTests { + + private Client client; + + @BeforeClass public void createNodes() throws Exception { + startNode("node1"); + client = getClient(); + } + + @AfterClass public void closeNodes() { + client.close(); + closeAllNodes(); + } + + protected Client getClient() { + return client("node1"); + } + + @Test public void testSourceLookupHighlighting() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2)) + .addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") + // we don't store title, now lets see if it works... + .startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject() + .startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject() + .endObject().endObject().endObject()) + .execute().actionGet(); + + for (int i = 0; i < 5; i++) { + client.prepareIndex("test", "type1", Integer.toString(i)) + .setSource(XContentFactory.jsonBuilder().startObject() + .field("title", "This is a test on the highlighting bug present in elasticsearch") + .startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray() + .endObject()) + .setRefresh(true).execute().actionGet(); + } + + SearchResponse search = client.prepareSearch() + .setQuery(fieldQuery("title", "bug")) + .addHighlightedField("title", -1, 0) + .execute().actionGet(); + + assertThat(search.hits().totalHits(), equalTo(5l)); + + for (SearchHit hit : search.hits()) { + assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting bug present in elasticsearch")); + } + + search = client.prepareSearch() + .setQuery(fieldQuery("attachments.body", "attachment")) + .addHighlightedField("attachments.body", -1, 0) + .execute().actionGet(); + + assertThat(search.hits().totalHits(), equalTo(5l)); + + for (SearchHit hit : search.hits()) { + assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("attachment 1 attachment 2")); + } + } +}