From 9a13763315e8da781bf7f7b6e12c8819f9271513 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 20 Sep 2011 14:33:02 +0300 Subject: [PATCH] Improve source based fields loading when searching, closes #1347. --- .../xcontent/support/XContentMapValues.java | 85 ++++++++++++++++ .../search/fetch/FetchPhase.java | 85 +++++++++------- .../search/fetch/FieldsParseElement.java | 34 +------ .../search/highlight/HighlightPhase.java | 2 +- .../SourceScoreOrderFragmentsBuilder.java | 2 +- .../SourceSimpleFragmentsBuilder.java | 2 +- .../search/lookup/SourceLookup.java | 44 +-------- .../support/XContentMapValuesTests.java | 99 +++++++++++++++++++ .../search/fields/SearchFieldsTests.java | 2 - 9 files changed, 247 insertions(+), 108 deletions(-) create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/common/xcontent/support/XContentMapValuesTests.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java index e565104972b..26c80cd0a93 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java @@ -19,8 +19,11 @@ package org.elasticsearch.common.xcontent.support; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.unit.TimeValue; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -29,6 +32,88 @@ import java.util.Map; */ public class XContentMapValues { + /** + * Extracts raw values (string, int, and so on) based on the path provided returning all of them + * as a single list. + */ + public static List extractRawValues(String path, Map map) { + List values = Lists.newArrayList(); + String[] pathElements = Strings.splitStringToArray(path, '.'); + if (pathElements.length == 0) { + return values; + } + extractRawValues(values, map, pathElements, 0); + return values; + } + + @SuppressWarnings({"unchecked"}) + private static void extractRawValues(List values, Map part, String[] pathElements, int index) { + if (index == pathElements.length) { + return; + } + String currentPath = pathElements[index]; + Object currentValue = part.get(currentPath); + if (currentValue == null) { + return; + } + if (currentValue instanceof Map) { + extractRawValues(values, (Map) currentValue, pathElements, index + 1); + } else if (currentValue instanceof List) { + extractRawValues(values, (List) currentValue, pathElements, index + 1); + } else { + values.add(currentValue); + } + } + + @SuppressWarnings({"unchecked"}) + private static void extractRawValues(List values, List part, String[] pathElements, int index) { + for (Object value : part) { + if (value == null) { + continue; + } + if (value instanceof Map) { + extractRawValues(values, (Map) value, pathElements, index); + } else if (value instanceof List) { + extractRawValues(values, (List) value, pathElements, index); + } else { + values.add(value); + } + } + } + + public static Object extractValue(String path, Map map) { + String[] pathElements = Strings.splitStringToArray(path, '.'); + if (pathElements.length == 0) { + return null; + } + return extractValue(pathElements, 0, map); + } + + @SuppressWarnings({"unchecked"}) private static Object extractValue(String[] pathElements, int index, Object currentValue) { + if (index == pathElements.length) { + return currentValue; + } + if (currentValue == null) { + return null; + } + if (currentValue instanceof Map) { + Map map = (Map) currentValue; + return extractValue(pathElements, index + 1, map.get(pathElements[index])); + } + if (currentValue instanceof List) { + List valueList = (List) currentValue; + List newList = new ArrayList(valueList.size()); + for (Object o : valueList) { + Object listValue = extractValue(pathElements, index, o); + if (listValue != null) { + newList.add(listValue); + } + } + return newList; + } + return null; + } + public static boolean isObject(Object node) { return node instanceof Map; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index ecd22b727e3..3f54f1c04e5 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.elasticsearch.common.collect.ImmutableMap; +import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.document.ResetFieldSelector; import org.elasticsearch.index.Index; @@ -81,7 +82,35 @@ public class FetchPhase implements SearchPhase { } public void execute(SearchContext context) { - ResetFieldSelector fieldSelector = buildFieldSelectors(context); + ResetFieldSelector fieldSelector; + List extractFieldNames = null; + if (context.hasScriptFields() && !context.hasFieldNames()) { + // we ask for script fields, and no field names, don't load the source + fieldSelector = UidFieldSelector.INSTANCE; + } else if (!context.hasFieldNames()) { + fieldSelector = new UidAndSourceFieldSelector(); + } else if (context.fieldNames().isEmpty()) { + fieldSelector = UidFieldSelector.INSTANCE; + } else if (context.fieldNames().get(0).equals("*")) { + // asked for all stored fields, just return null so all of them will be loaded + // don't load the source field in this case, makes little sense to get it with all stored fields + fieldSelector = AllButSourceFieldSelector.INSTANCE; + } else { + FieldMappersFieldSelector fieldSelectorMapper = new FieldMappersFieldSelector(); + for (String fieldName : context.fieldNames()) { + FieldMappers x = context.mapperService().smartNameFieldMappers(fieldName); + if (x != null && x.mapper().stored()) { + fieldSelectorMapper.add(x); + } else { + if (extractFieldNames == null) { + extractFieldNames = Lists.newArrayList(); + } + extractFieldNames.add(fieldName); + } + } + fieldSelectorMapper.add(UidFieldMapper.NAME); + fieldSelector = fieldSelectorMapper; + } InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()]; for (int index = 0; index < context.docIdsToLoadSize(); index++) { @@ -148,6 +177,28 @@ public class FetchPhase implements SearchPhase { int readerIndex = context.searcher().readerIndex(docId); IndexReader subReader = context.searcher().subReaders()[readerIndex]; int subDoc = docId - context.searcher().docStarts()[readerIndex]; + + // go over and extract fields that are not mapped / stored + context.lookup().setNextReader(subReader); + context.lookup().setNextDocId(docId); + if (extractFieldNames != null) { + for (String extractFieldName : extractFieldNames) { + Object value = context.lookup().source().extractValue(extractFieldName); + if (value != null) { + if (searchHit.fieldsOrNull() == null) { + searchHit.fields(new HashMap(2)); + } + + SearchHitField hitField = searchHit.fields().get(extractFieldName); + if (hitField == null) { + hitField = new InternalSearchHitField(extractFieldName, new ArrayList(2)); + searchHit.fields().put(extractFieldName, hitField); + } + hitField.values().add(value); + } + } + } + for (SearchHitPhase hitPhase : hitPhases) { SearchHitPhase.HitContext hitContext = new SearchHitPhase.HitContext(); if (hitPhase.executionNeeded(context)) { @@ -189,36 +240,4 @@ public class FetchPhase implements SearchPhase { throw new FetchPhaseExecutionException(context, "Failed to fetch doc id [" + docId + "]", e); } } - - private ResetFieldSelector buildFieldSelectors(SearchContext context) { - if (context.hasScriptFields() && !context.hasFieldNames()) { - // we ask for script fields, and no field names, don't load the source - return UidFieldSelector.INSTANCE; - } - - if (!context.hasFieldNames()) { - return new UidAndSourceFieldSelector(); - } - - if (context.fieldNames().isEmpty()) { - return UidFieldSelector.INSTANCE; - } - - // asked for all stored fields, just return null so all of them will be loaded - // don't load the source field in this case, makes little sense to get it with all stored fields - if (context.fieldNames().get(0).equals("*")) { - return AllButSourceFieldSelector.INSTANCE; - } - - FieldMappersFieldSelector fieldSelector = new FieldMappersFieldSelector(); - for (String fieldName : context.fieldNames()) { - FieldMappers x = context.mapperService().smartNameFieldMappers(fieldName); - if (x == null) { - throw new FetchPhaseExecutionException(context, "No mapping for field [" + fieldName + "] in order to load it"); - } - fieldSelector.add(x); - } - fieldSelector.add(UidFieldMapper.NAME); - return fieldSelector; - } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FieldsParseElement.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FieldsParseElement.java index 2b8edc15370..3dec32bc98b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FieldsParseElement.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/fetch/FieldsParseElement.java @@ -20,7 +20,6 @@ package org.elasticsearch.search.fetch; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.fetch.script.ScriptFieldsContext; @@ -42,21 +41,8 @@ public class FieldsParseElement implements SearchParseElement { SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", name, null); context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true)); } else { - if ("*".equals(name)) { - added = true; - context.fieldNames().add("*"); - } else { - FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(name); - if (fieldMapper != null) { - if (fieldMapper.stored()) { - added = true; - context.fieldNames().add(name); - } else { - SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", "_source." + fieldMapper.names().fullName(), null); - context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true)); - } - } - } + added = true; + context.fieldNames().add(name); } } if (!added) { @@ -69,21 +55,7 @@ public class FieldsParseElement implements SearchParseElement { SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", name, null); context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true)); } else { - if ("*".equals(name)) { - context.fieldNames().add("*"); - } else { - FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(name); - if (fieldMapper != null) { - if (fieldMapper.stored()) { - context.fieldNames().add(name); - } else { - SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", "_source." + fieldMapper.names().fullName(), null); - context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true)); - } - } else { - context.emptyFieldNames(); // don't load anything if we can't find mapping - } - } + context.fieldNames().add(name); } } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 0db8088edb3..5f80520aadf 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -141,7 +141,7 @@ public class HighlightPhase implements SearchHitPhase { SearchLookup lookup = context.lookup(); lookup.setNextReader(hitContext.reader()); lookup.setNextDocId(hitContext.docId()); - textsToHighlight = lookup.source().getValues(mapper.names().fullName()); + textsToHighlight = lookup.source().extractRawValues(mapper.names().fullName()); } // a HACK to make highlighter do highlighting, even though its using the single frag list builder diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java index 72b819bd5b8..65cbb22312f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java @@ -51,7 +51,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder lookup.setNextReader(reader); lookup.setNextDocId(docId); - List values = lookup.source().getValues(mapper.names().fullName()); + List values = lookup.source().extractRawValues(mapper.names().fullName()); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java index 76b5534676a..819883ab37c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java @@ -53,7 +53,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder { lookup.setNextReader(reader); lookup.setNextDocId(docId); - List values = lookup.source().getValues(mapper.names().fullName()); + List values = lookup.source().extractRawValues(mapper.names().fullName()); if (values.isEmpty()) { return EMPTY_FIELDS; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 03b269e17ce..feb309b9f2f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -23,7 +23,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticSearchParseException; -import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.compress.lzf.LZF; import org.elasticsearch.common.io.stream.BytesStreamInput; import org.elasticsearch.common.io.stream.CachedStreamInput; @@ -31,6 +30,7 @@ import org.elasticsearch.common.io.stream.LZFStreamInput; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.SourceFieldSelector; @@ -126,46 +126,12 @@ public class SourceLookup implements Map { * Returns the values associated with the path. Those are "low" level values, and it can * handle path expression where an array/list is navigated within. */ - public List getValues(String path) { - List values = Lists.newArrayList(); - String[] pathElements = dotPattern.split(path); - getValues(values, loadSourceIfNeeded(), pathElements, 0); - return values; + public List extractRawValues(String path) { + return XContentMapValues.extractRawValues(path, loadSourceIfNeeded()); } - @SuppressWarnings({"unchecked"}) - private void getValues(List values, Map part, String[] pathElements, int index) { - if (index == pathElements.length) { - return; - } - String currentPath = pathElements[index]; - Object currentValue = part.get(currentPath); - if (currentValue == null) { - return; - } - if (currentValue instanceof Map) { - getValues(values, (Map) currentValue, pathElements, index + 1); - } else if (currentValue instanceof List) { - getValues(values, (List) currentValue, pathElements, index + 1); - } else { - values.add(currentValue); - } - } - - @SuppressWarnings({"unchecked"}) - private void getValues(List values, List part, String[] pathElements, int index) { - for (Object value : part) { - if (value == null) { - continue; - } - if (value instanceof Map) { - getValues(values, (Map) value, pathElements, index); - } else if (value instanceof List) { - getValues(values, (List) value, pathElements, index); - } else { - values.add(value); - } - } + public Object extractValue(String path) { + return XContentMapValues.extractValue(path, loadSourceIfNeeded()); } @Override public Object get(Object key) { diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/common/xcontent/support/XContentMapValuesTests.java b/modules/elasticsearch/src/test/java/org/elasticsearch/common/xcontent/support/XContentMapValuesTests.java new file mode 100644 index 00000000000..4ac2b06bf77 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/common/xcontent/support/XContentMapValuesTests.java @@ -0,0 +1,99 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.xcontent.support; + +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.Map; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +/** + */ +@Test +public class XContentMapValuesTests { + + @SuppressWarnings({"unchecked"}) @Test public void testExtractValue() throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject() + .field("test", "value") + .endObject(); + + Map map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose(); + assertThat(XContentMapValues.extractValue("test", map).toString(), equalTo("value")); + assertThat(XContentMapValues.extractValue("test.me", map), nullValue()); + assertThat(XContentMapValues.extractValue("something.else.2", map), nullValue()); + + builder = XContentFactory.jsonBuilder().startObject() + .startObject("path1").startObject("path2").field("test", "value").endObject().endObject() + .endObject(); + + map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose(); + assertThat(XContentMapValues.extractValue("path1.path2.test", map).toString(), equalTo("value")); + assertThat(XContentMapValues.extractValue("path1.path2.test_me", map), nullValue()); + assertThat(XContentMapValues.extractValue("path1.non_path2.test", map), nullValue()); + + Object extValue = XContentMapValues.extractValue("path1.path2", map); + assertThat(extValue, instanceOf(Map.class)); + Map extMapValue = (Map) extValue; + assertThat(extMapValue, hasEntry("test", (Object) "value")); + + extValue = XContentMapValues.extractValue("path1", map); + assertThat(extValue, instanceOf(Map.class)); + extMapValue = (Map) extValue; + assertThat(extMapValue.containsKey("path2"), equalTo(true)); + + // lists + builder = XContentFactory.jsonBuilder().startObject() + .startObject("path1").field("test", "value1", "value2").endObject() + .endObject(); + + map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose(); + + extValue = XContentMapValues.extractValue("path1.test", map); + assertThat(extValue, instanceOf(List.class)); + + List extListValue = (List) extValue; + assertThat(extListValue.size(), equalTo(2)); + + builder = XContentFactory.jsonBuilder().startObject() + .startObject("path1") + .startArray("path2") + .startObject().field("test", "value1").endObject() + .startObject().field("test", "value2").endObject() + .endArray() + .endObject() + .endObject(); + + map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose(); + + extValue = XContentMapValues.extractValue("path1.path2.test", map); + assertThat(extValue, instanceOf(List.class)); + + extListValue = (List) extValue; + assertThat(extListValue.size(), equalTo(2)); + assertThat(extListValue.get(0).toString(), equalTo("value1")); + assertThat(extListValue.get(1).toString(), equalTo("value2")); + } +} diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java index 0f3890a0db1..2357496b4b4 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java @@ -66,8 +66,6 @@ public class SearchFieldsTests extends AbstractNodesTests { client.admin().indices().preparePutMapping().setType("type1").setSource(mapping).execute().actionGet(); - Thread.sleep(100); // sleep a bit here..., so hte mappings get applied - client.prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject() .field("field1", "value1") .field("field2", "value2")