inner_hits: Don't use bitset cache for children filters.

Only parent filters should use bitset filter cache, to avoid memory being wasted.
Also in case of object fields inline the field name into the nested object,
instead of creating an additional (dummy) nested identity.

Closes #10662
Closes #10629
This commit is contained in:
Martijn van Groningen 2015-04-19 23:52:29 +02:00
parent 82ad074dfe
commit 7a6fe809d0
3 changed files with 96 additions and 51 deletions

View File

@ -24,8 +24,9 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet;
import org.elasticsearch.ElasticsearchGenerationException; import org.elasticsearch.ElasticsearchGenerationException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -41,21 +42,8 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
import org.elasticsearch.index.mapper.Mapping.SourceTransform; import org.elasticsearch.index.mapper.Mapping.SourceTransform;
import org.elasticsearch.index.mapper.internal.AllFieldMapper; import org.elasticsearch.index.mapper.internal.*;
import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.mapper.object.RootObjectMapper; import org.elasticsearch.index.mapper.object.RootObjectMapper;
import org.elasticsearch.script.ExecutableScript; import org.elasticsearch.script.ExecutableScript;
@ -63,14 +51,10 @@ import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptContext; import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptService; import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptService.ScriptType; import org.elasticsearch.script.ScriptService.ScriptType;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CopyOnWriteArrayList;
/** /**
@ -352,15 +336,29 @@ public class DocumentMapper implements ToXContent {
/** /**
* Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId. * Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId.
*/ */
public ObjectMapper findNestedObjectMapper(int nestedDocId, BitsetFilterCache cache, LeafReaderContext context) throws IOException { public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, LeafReaderContext context) throws IOException {
ObjectMapper nestedObjectMapper = null; ObjectMapper nestedObjectMapper = null;
for (ObjectMapper objectMapper : objectMappers().values()) { for (ObjectMapper objectMapper : objectMappers().values()) {
if (!objectMapper.nested().isNested()) { if (!objectMapper.nested().isNested()) {
continue; continue;
} }
BitDocIdSet nestedTypeBitSet = cache.getBitDocIdSetFilter(objectMapper.nestedTypeFilter()).getDocIdSet(context); Filter filter = sc.filterCache().cache(objectMapper.nestedTypeFilter(), null, sc.queryParserService().autoFilterCachePolicy());
if (nestedTypeBitSet != null && nestedTypeBitSet.bits().get(nestedDocId)) { if (filter == null) {
continue;
}
// We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and
// therefor is guaranteed to be a live doc.
DocIdSet nestedTypeSet = filter.getDocIdSet(context, null);
if (nestedTypeSet == null) {
continue;
}
DocIdSetIterator iterator = nestedTypeSet.iterator();
if (iterator == null) {
continue;
}
if (iterator.advance(nestedDocId) == nestedDocId) {
if (nestedObjectMapper == null) { if (nestedObjectMapper == null) {
nestedObjectMapper = objectMapper; nestedObjectMapper = objectMapper;
} else { } else {

View File

@ -21,9 +21,9 @@ package org.elasticsearch.search.fetch;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitDocIdSet;
@ -67,12 +67,7 @@ import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SourceLookup; import org.elasticsearch.search.lookup.SourceLookup;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static com.google.common.collect.Lists.newArrayList; import static com.google.common.collect.Lists.newArrayList;
import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder; import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder;
@ -288,7 +283,7 @@ public class FetchPhase implements SearchPhase {
SourceLookup sourceLookup = context.lookup().source(); SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId); sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId);
ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedSubDocId, context.bitsetFilterCache(), subReaderContext); ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedSubDocId, context, subReaderContext);
assert nestedObjectMapper != null; assert nestedObjectMapper != null;
InternalSearchHit.InternalNestedIdentity nestedIdentity = getInternalNestedIdentity(context, nestedSubDocId, subReaderContext, documentMapper, nestedObjectMapper); InternalSearchHit.InternalNestedIdentity nestedIdentity = getInternalNestedIdentity(context, nestedSubDocId, subReaderContext, documentMapper, nestedObjectMapper);
@ -375,38 +370,56 @@ public class FetchPhase implements SearchPhase {
private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity(SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException { private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity(SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException {
int currentParent = nestedSubDocId; int currentParent = nestedSubDocId;
ObjectMapper nestedParentObjectMapper; ObjectMapper nestedParentObjectMapper;
StringBuilder field = new StringBuilder();
ObjectMapper current = nestedObjectMapper;
InternalSearchHit.InternalNestedIdentity nestedIdentity = null; InternalSearchHit.InternalNestedIdentity nestedIdentity = null;
do { do {
String field;
Filter parentFilter; Filter parentFilter;
nestedParentObjectMapper = documentMapper.findParentObjectMapper(nestedObjectMapper); nestedParentObjectMapper = documentMapper.findParentObjectMapper(current);
if (field.length() != 0) {
field.insert(0, '.');
}
field.insert(0, current.name());
if (nestedParentObjectMapper != null) { if (nestedParentObjectMapper != null) {
field = nestedObjectMapper.name(); if (nestedParentObjectMapper.nested().isNested() == false) {
if (!nestedParentObjectMapper.nested().isNested()) { current = nestedParentObjectMapper;
nestedObjectMapper = nestedParentObjectMapper;
// all right, the parent is a normal object field, so this is the best identiy we can give for that:
nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field, 0, nestedIdentity);
continue; continue;
} }
parentFilter = nestedParentObjectMapper.nestedTypeFilter(); parentFilter = nestedParentObjectMapper.nestedTypeFilter();
} else { } else {
field = nestedObjectMapper.fullPath();
parentFilter = Queries.newNonNestedFilter(); parentFilter = Queries.newNonNestedFilter();
} }
Filter childFilter = context.filterCache().cache(nestedObjectMapper.nestedTypeFilter(), null, context.queryParserService().autoFilterCachePolicy());
if (childFilter == null) {
current = nestedParentObjectMapper;
continue;
}
// We can pass down 'null' as acceptedDocs, because we're fetching matched docId that matched in the query phase.
DocIdSet childDocSet = childFilter.getDocIdSet(subReaderContext, null);
if (childDocSet == null) {
current = nestedParentObjectMapper;
continue;
}
DocIdSetIterator childIter = childDocSet.iterator();
if (childIter == null) {
current = nestedParentObjectMapper;
continue;
}
BitDocIdSet parentBitSet = context.bitsetFilterCache().getBitDocIdSetFilter(parentFilter).getDocIdSet(subReaderContext); BitDocIdSet parentBitSet = context.bitsetFilterCache().getBitDocIdSetFilter(parentFilter).getDocIdSet(subReaderContext);
BitSet parentBits = parentBitSet.bits(); BitSet parentBits = parentBitSet.bits();
int offset = 0; int offset = 0;
BitDocIdSet nestedDocsBitSet = context.bitsetFilterCache().getBitDocIdSetFilter(nestedObjectMapper.nestedTypeFilter()).getDocIdSet(subReaderContext);
BitSet nestedBits = nestedDocsBitSet.bits();
int nextParent = parentBits.nextSetBit(currentParent); int nextParent = parentBits.nextSetBit(currentParent);
for (int docId = nestedBits.nextSetBit(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = nestedBits.nextSetBit(docId + 1)) { for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) {
offset++; offset++;
} }
currentParent = nextParent; currentParent = nextParent;
nestedObjectMapper = nestedParentObjectMapper; current = nestedObjectMapper = nestedParentObjectMapper;
nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field, offset, nestedIdentity); nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field.toString(), offset, nestedIdentity);
} while (nestedParentObjectMapper != null); field = new StringBuilder();
} while (current != null);
return nestedIdentity; return nestedIdentity;
} }

View File

@ -867,7 +867,12 @@ public class InnerHitsTests extends ElasticsearchIntegrationTest {
List<IndexRequestBuilder> requests = new ArrayList<>(); List<IndexRequestBuilder> requests = new ArrayList<>();
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject() requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox") .field("title", "quick brown fox")
.startObject("comments").startObject("messages").field("message", "fox eat quick").endObject().endObject() .startObject("comments")
.startArray("messages")
.startObject().field("message", "fox eat quick").endObject()
.startObject().field("message", "bear eat quick").endObject()
.endArray()
.endObject()
.endObject())); .endObject()));
indexRandom(true, requests); indexRandom(true, requests);
@ -879,11 +884,40 @@ public class InnerHitsTests extends ElasticsearchIntegrationTest {
assertThat(response.getHits().getAt(0).id(), equalTo("1")); assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l)); assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1")); assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments")); assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(0)); assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("messages")); assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getChild(), nullValue()); response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.messages", matchQuery("comments.messages.message", "bear")).innerHit(new QueryInnerHitBuilder()))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(1));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());
// index the message in an object form instead of an array
requests = new ArrayList<>();
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startObject("comments").startObject("messages").field("message", "fox eat quick").endObject().endObject()
.endObject()));
indexRandom(true, requests);
response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.messages", matchQuery("comments.messages.message", "fox")).innerHit(new QueryInnerHitBuilder()))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());
} }
} }