Improve source based fields loading when searching, closes #1347.

This commit is contained in:
Shay Banon 2011-09-20 14:33:02 +03:00
parent 29d0bfdaa9
commit 9a13763315
9 changed files with 247 additions and 108 deletions

View File

@ -19,8 +19,11 @@
package org.elasticsearch.common.xcontent.support;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.unit.TimeValue;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@ -29,6 +32,88 @@ import java.util.Map;
*/
public class XContentMapValues {
/**
* Extracts raw values (string, int, and so on) based on the path provided returning all of them
* as a single list.
*/
public static List<Object> extractRawValues(String path, Map<String, Object> map) {
List<Object> values = Lists.newArrayList();
String[] pathElements = Strings.splitStringToArray(path, '.');
if (pathElements.length == 0) {
return values;
}
extractRawValues(values, map, pathElements, 0);
return values;
}
@SuppressWarnings({"unchecked"})
private static void extractRawValues(List values, Map<String, Object> part, String[] pathElements, int index) {
if (index == pathElements.length) {
return;
}
String currentPath = pathElements[index];
Object currentValue = part.get(currentPath);
if (currentValue == null) {
return;
}
if (currentValue instanceof Map) {
extractRawValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
} else if (currentValue instanceof List) {
extractRawValues(values, (List) currentValue, pathElements, index + 1);
} else {
values.add(currentValue);
}
}
@SuppressWarnings({"unchecked"})
private static void extractRawValues(List values, List<Object> part, String[] pathElements, int index) {
for (Object value : part) {
if (value == null) {
continue;
}
if (value instanceof Map) {
extractRawValues(values, (Map<String, Object>) value, pathElements, index);
} else if (value instanceof List) {
extractRawValues(values, (List) value, pathElements, index);
} else {
values.add(value);
}
}
}
public static Object extractValue(String path, Map<String, Object> map) {
String[] pathElements = Strings.splitStringToArray(path, '.');
if (pathElements.length == 0) {
return null;
}
return extractValue(pathElements, 0, map);
}
@SuppressWarnings({"unchecked"}) private static Object extractValue(String[] pathElements, int index, Object currentValue) {
if (index == pathElements.length) {
return currentValue;
}
if (currentValue == null) {
return null;
}
if (currentValue instanceof Map) {
Map map = (Map) currentValue;
return extractValue(pathElements, index + 1, map.get(pathElements[index]));
}
if (currentValue instanceof List) {
List valueList = (List) currentValue;
List newList = new ArrayList(valueList.size());
for (Object o : valueList) {
Object listValue = extractValue(pathElements, index, o);
if (listValue != null) {
newList.add(listValue);
}
}
return newList;
}
return null;
}
public static boolean isObject(Object node) {
return node instanceof Map;
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.document.ResetFieldSelector;
import org.elasticsearch.index.Index;
@ -81,7 +82,35 @@ public class FetchPhase implements SearchPhase {
}
public void execute(SearchContext context) {
ResetFieldSelector fieldSelector = buildFieldSelectors(context);
ResetFieldSelector fieldSelector;
List<String> extractFieldNames = null;
if (context.hasScriptFields() && !context.hasFieldNames()) {
// we ask for script fields, and no field names, don't load the source
fieldSelector = UidFieldSelector.INSTANCE;
} else if (!context.hasFieldNames()) {
fieldSelector = new UidAndSourceFieldSelector();
} else if (context.fieldNames().isEmpty()) {
fieldSelector = UidFieldSelector.INSTANCE;
} else if (context.fieldNames().get(0).equals("*")) {
// asked for all stored fields, just return null so all of them will be loaded
// don't load the source field in this case, makes little sense to get it with all stored fields
fieldSelector = AllButSourceFieldSelector.INSTANCE;
} else {
FieldMappersFieldSelector fieldSelectorMapper = new FieldMappersFieldSelector();
for (String fieldName : context.fieldNames()) {
FieldMappers x = context.mapperService().smartNameFieldMappers(fieldName);
if (x != null && x.mapper().stored()) {
fieldSelectorMapper.add(x);
} else {
if (extractFieldNames == null) {
extractFieldNames = Lists.newArrayList();
}
extractFieldNames.add(fieldName);
}
}
fieldSelectorMapper.add(UidFieldMapper.NAME);
fieldSelector = fieldSelectorMapper;
}
InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()];
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
@ -148,6 +177,28 @@ public class FetchPhase implements SearchPhase {
int readerIndex = context.searcher().readerIndex(docId);
IndexReader subReader = context.searcher().subReaders()[readerIndex];
int subDoc = docId - context.searcher().docStarts()[readerIndex];
// go over and extract fields that are not mapped / stored
context.lookup().setNextReader(subReader);
context.lookup().setNextDocId(docId);
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
Object value = context.lookup().source().extractValue(extractFieldName);
if (value != null) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}
SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<Object>(2));
searchHit.fields().put(extractFieldName, hitField);
}
hitField.values().add(value);
}
}
}
for (SearchHitPhase hitPhase : hitPhases) {
SearchHitPhase.HitContext hitContext = new SearchHitPhase.HitContext();
if (hitPhase.executionNeeded(context)) {
@ -189,36 +240,4 @@ public class FetchPhase implements SearchPhase {
throw new FetchPhaseExecutionException(context, "Failed to fetch doc id [" + docId + "]", e);
}
}
private ResetFieldSelector buildFieldSelectors(SearchContext context) {
if (context.hasScriptFields() && !context.hasFieldNames()) {
// we ask for script fields, and no field names, don't load the source
return UidFieldSelector.INSTANCE;
}
if (!context.hasFieldNames()) {
return new UidAndSourceFieldSelector();
}
if (context.fieldNames().isEmpty()) {
return UidFieldSelector.INSTANCE;
}
// asked for all stored fields, just return null so all of them will be loaded
// don't load the source field in this case, makes little sense to get it with all stored fields
if (context.fieldNames().get(0).equals("*")) {
return AllButSourceFieldSelector.INSTANCE;
}
FieldMappersFieldSelector fieldSelector = new FieldMappersFieldSelector();
for (String fieldName : context.fieldNames()) {
FieldMappers x = context.mapperService().smartNameFieldMappers(fieldName);
if (x == null) {
throw new FetchPhaseExecutionException(context, "No mapping for field [" + fieldName + "] in order to load it");
}
fieldSelector.add(x);
}
fieldSelector.add(UidFieldMapper.NAME);
return fieldSelector;
}
}

View File

@ -20,7 +20,6 @@
package org.elasticsearch.search.fetch;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.script.ScriptFieldsContext;
@ -42,21 +41,8 @@ public class FieldsParseElement implements SearchParseElement {
SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", name, null);
context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true));
} else {
if ("*".equals(name)) {
added = true;
context.fieldNames().add("*");
} else {
FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(name);
if (fieldMapper != null) {
if (fieldMapper.stored()) {
added = true;
context.fieldNames().add(name);
} else {
SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", "_source." + fieldMapper.names().fullName(), null);
context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true));
}
}
}
added = true;
context.fieldNames().add(name);
}
}
if (!added) {
@ -69,21 +55,7 @@ public class FieldsParseElement implements SearchParseElement {
SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", name, null);
context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true));
} else {
if ("*".equals(name)) {
context.fieldNames().add("*");
} else {
FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(name);
if (fieldMapper != null) {
if (fieldMapper.stored()) {
context.fieldNames().add(name);
} else {
SearchScript searchScript = context.scriptService().search(context.lookup(), "mvel", "_source." + fieldMapper.names().fullName(), null);
context.scriptFields().add(new ScriptFieldsContext.ScriptField(name, searchScript, true));
}
} else {
context.emptyFieldNames(); // don't load anything if we can't find mapping
}
}
context.fieldNames().add(name);
}
}
}

View File

@ -141,7 +141,7 @@ public class HighlightPhase implements SearchHitPhase {
SearchLookup lookup = context.lookup();
lookup.setNextReader(hitContext.reader());
lookup.setNextDocId(hitContext.docId());
textsToHighlight = lookup.source().getValues(mapper.names().fullName());
textsToHighlight = lookup.source().extractRawValues(mapper.names().fullName());
}
// a HACK to make highlighter do highlighting, even though its using the single frag list builder

View File

@ -51,7 +51,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder
lookup.setNextReader(reader);
lookup.setNextDocId(docId);
List<Object> values = lookup.source().getValues(mapper.names().fullName());
List<Object> values = lookup.source().extractRawValues(mapper.names().fullName());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);

View File

@ -53,7 +53,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
lookup.setNextReader(reader);
lookup.setNextDocId(docId);
List<Object> values = lookup.source().getValues(mapper.names().fullName());
List<Object> values = lookup.source().extractRawValues(mapper.names().fullName());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}

View File

@ -23,7 +23,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.compress.lzf.LZF;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.common.io.stream.CachedStreamInput;
@ -31,6 +30,7 @@ import org.elasticsearch.common.io.stream.LZFStreamInput;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.index.mapper.internal.SourceFieldSelector;
@ -126,46 +126,12 @@ public class SourceLookup implements Map {
* Returns the values associated with the path. Those are "low" level values, and it can
* handle path expression where an array/list is navigated within.
*/
public List<Object> getValues(String path) {
List<Object> values = Lists.newArrayList();
String[] pathElements = dotPattern.split(path);
getValues(values, loadSourceIfNeeded(), pathElements, 0);
return values;
public List<Object> extractRawValues(String path) {
return XContentMapValues.extractRawValues(path, loadSourceIfNeeded());
}
@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, Map<String, Object> part, String[] pathElements, int index) {
if (index == pathElements.length) {
return;
}
String currentPath = pathElements[index];
Object currentValue = part.get(currentPath);
if (currentValue == null) {
return;
}
if (currentValue instanceof Map) {
getValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
} else if (currentValue instanceof List) {
getValues(values, (List<Object>) currentValue, pathElements, index + 1);
} else {
values.add(currentValue);
}
}
@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, List<Object> part, String[] pathElements, int index) {
for (Object value : part) {
if (value == null) {
continue;
}
if (value instanceof Map) {
getValues(values, (Map<String, Object>) value, pathElements, index);
} else if (value instanceof List) {
getValues(values, (List<Object>) value, pathElements, index);
} else {
values.add(value);
}
}
public Object extractValue(String path) {
return XContentMapValues.extractValue(path, loadSourceIfNeeded());
}
@Override public Object get(Object key) {

View File

@ -0,0 +1,99 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.xcontent.support;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.testng.annotations.Test;
import java.util.List;
import java.util.Map;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
*/
@Test
public class XContentMapValuesTests {
@SuppressWarnings({"unchecked"}) @Test public void testExtractValue() throws Exception {
XContentBuilder builder = XContentFactory.jsonBuilder().startObject()
.field("test", "value")
.endObject();
Map<String, Object> map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose();
assertThat(XContentMapValues.extractValue("test", map).toString(), equalTo("value"));
assertThat(XContentMapValues.extractValue("test.me", map), nullValue());
assertThat(XContentMapValues.extractValue("something.else.2", map), nullValue());
builder = XContentFactory.jsonBuilder().startObject()
.startObject("path1").startObject("path2").field("test", "value").endObject().endObject()
.endObject();
map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose();
assertThat(XContentMapValues.extractValue("path1.path2.test", map).toString(), equalTo("value"));
assertThat(XContentMapValues.extractValue("path1.path2.test_me", map), nullValue());
assertThat(XContentMapValues.extractValue("path1.non_path2.test", map), nullValue());
Object extValue = XContentMapValues.extractValue("path1.path2", map);
assertThat(extValue, instanceOf(Map.class));
Map<String, Object> extMapValue = (Map<String, Object>) extValue;
assertThat(extMapValue, hasEntry("test", (Object) "value"));
extValue = XContentMapValues.extractValue("path1", map);
assertThat(extValue, instanceOf(Map.class));
extMapValue = (Map<String, Object>) extValue;
assertThat(extMapValue.containsKey("path2"), equalTo(true));
// lists
builder = XContentFactory.jsonBuilder().startObject()
.startObject("path1").field("test", "value1", "value2").endObject()
.endObject();
map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose();
extValue = XContentMapValues.extractValue("path1.test", map);
assertThat(extValue, instanceOf(List.class));
List extListValue = (List) extValue;
assertThat(extListValue.size(), equalTo(2));
builder = XContentFactory.jsonBuilder().startObject()
.startObject("path1")
.startArray("path2")
.startObject().field("test", "value1").endObject()
.startObject().field("test", "value2").endObject()
.endArray()
.endObject()
.endObject();
map = XContentFactory.xContent(XContentType.JSON).createParser(builder.string()).mapAndClose();
extValue = XContentMapValues.extractValue("path1.path2.test", map);
assertThat(extValue, instanceOf(List.class));
extListValue = (List) extValue;
assertThat(extListValue.size(), equalTo(2));
assertThat(extListValue.get(0).toString(), equalTo("value1"));
assertThat(extListValue.get(1).toString(), equalTo("value2"));
}
}

View File

@ -66,8 +66,6 @@ public class SearchFieldsTests extends AbstractNodesTests {
client.admin().indices().preparePutMapping().setType("type1").setSource(mapping).execute().actionGet();
Thread.sleep(100); // sleep a bit here..., so hte mappings get applied
client.prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
.field("field1", "value1")
.field("field2", "value2")