From 292e53fe77cae17362029d79e6afb937e75eec89 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 2 Dec 2013 00:16:06 +0100 Subject: [PATCH] The short circuit mechanism always needs to be wrapped with the live docs. In certain scenarios the live docs isn't passed down with acceptedDocs. For example when a has_child is wrapped in a filtered query as query and the wrapped filter is cached. The short circuit mechanism in that case counts down based on deleted docs, which then yields lower results than is expected. Relates to #4306 --- .../child/ChildrenConstantScoreQuery.java | 21 ++++++++-------- .../index/search/child/ChildrenQuery.java | 23 +++++++---------- .../ChildrenConstantScoreQueryTests.java | 25 ++++++++++++++++--- .../search/child/ChildrenQueryTests.java | 2 +- 4 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java b/src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java index ca422ec94ce..ccd6c7f0c4f 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java @@ -173,16 +173,17 @@ public class ChildrenConstantScoreQuery extends Query { } DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, acceptDocs); - if (DocIdSets.isEmpty(parentDocIdSet)) { - return null; - } - - IdReaderTypeCache idReaderTypeCache = searchContext.idCache().reader(context.reader()).type(parentType); - if (idReaderTypeCache != null) { - DocIdSetIterator innerIterator = parentDocIdSet.iterator(); - if (innerIterator != null) { - ParentDocIdIterator parentDocIdIterator = new ParentDocIdIterator(innerIterator, collectedUids.v(), idReaderTypeCache); - return ConstantScorer.create(parentDocIdIterator, this, queryWeight); + if (!DocIdSets.isEmpty(parentDocIdSet)) { + IdReaderTypeCache idReaderTypeCache = searchContext.idCache().reader(context.reader()).type(parentType); + // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining" + // count down (short circuit) logic will then work as expected. + parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs()); + if (idReaderTypeCache != null) { + DocIdSetIterator innerIterator = parentDocIdSet.iterator(); + if (innerIterator != null) { + ParentDocIdIterator parentDocIdIterator = new ParentDocIdIterator(innerIterator, collectedUids.v(), idReaderTypeCache); + return ConstantScorer.create(parentDocIdIterator, this, queryWeight); + } } } return null; diff --git a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java index ccda68d8132..70356cc1eea 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java @@ -69,7 +69,7 @@ public class ChildrenQuery extends Query { public ChildrenQuery(String parentType, String childType, Filter parentFilter, Query childQuery, ScoreType scoreType, int shortCircuitParentDocSet) { this.parentType = parentType; this.childType = childType; - this.parentFilter = new ApplyAcceptedDocsFilter(parentFilter); + this.parentFilter = parentFilter; this.originalChildQuery = childQuery; this.scoreType = scoreType; this.shortCircuitParentDocSet = shortCircuitParentDocSet; @@ -190,7 +190,7 @@ public class ChildrenQuery extends Query { private ParentWeight(Weight childWeight, Filter parentFilter, SearchContext searchContext, int remaining, Recycler.V> uidToScore, Recycler.V> uidToCount) { this.childWeight = childWeight; - this.parentFilter = parentFilter; + this.parentFilter = new ApplyAcceptedDocsFilter(parentFilter); this.searchContext = searchContext; this.remaining = remaining; this.uidToScore = uidToScore; @@ -226,7 +226,9 @@ public class ChildrenQuery extends Query { } IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType); - DocIdSetIterator parentsIterator = parentsSet.iterator(); + // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining" + // count down (short circuit) logic will then work as expected. + DocIdSetIterator parentsIterator = BitsFilteredDocIdSet.wrap(parentsSet, context.reader().getLiveDocs()).iterator(); switch (scoreType) { case AVG: return new AvgParentScorer(this, idTypeCache, uidToScore.v(), uidToCount.v(), parentsIterator); @@ -247,7 +249,6 @@ public class ChildrenQuery extends Query { final IdReaderTypeCache idTypeCache; final DocIdSetIterator parentsIterator; - int remaining; int currentDocId = -1; float currentScore; @@ -256,7 +257,6 @@ public class ChildrenQuery extends Query { this.idTypeCache = idTypeCache; this.parentsIterator = parentsIterator; this.uidToScore = uidToScore; - this.remaining = uidToScore.size(); } @Override @@ -279,8 +279,7 @@ public class ChildrenQuery extends Query { @Override public int nextDoc() throws IOException { if (remaining == 0) { - currentDocId = NO_MORE_DOCS; - return NO_MORE_DOCS; + return currentDocId = NO_MORE_DOCS; } while (true) { @@ -303,8 +302,7 @@ public class ChildrenQuery extends Query { @Override public int advance(int target) throws IOException { if (remaining == 0) { - currentDocId = NO_MORE_DOCS; - return NO_MORE_DOCS; + return currentDocId = NO_MORE_DOCS; } currentDocId = parentsIterator.advance(target); @@ -341,8 +339,7 @@ public class ChildrenQuery extends Query { @Override public int nextDoc() throws IOException { if (remaining == 0) { - currentDocId = NO_MORE_DOCS; - return NO_MORE_DOCS; + return currentDocId = NO_MORE_DOCS; } while (true) { @@ -365,8 +362,7 @@ public class ChildrenQuery extends Query { @Override public int advance(int target) throws IOException { if (remaining == 0) { - currentDocId = NO_MORE_DOCS; - return NO_MORE_DOCS; + return currentDocId = NO_MORE_DOCS; } currentDocId = parentsIterator.advance(target); @@ -425,7 +421,6 @@ public class ChildrenQuery extends Query { break; case AVG: assert false : "AVG has its own collector"; - default: assert false : "Are we missing a score type here? -- " + scoreType; break; diff --git a/src/test/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQueryTests.java b/src/test/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQueryTests.java index b88cf38a679..2082956869a 100644 --- a/src/test/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQueryTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQueryTests.java @@ -31,7 +31,9 @@ import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.compress.CompressedString; +import org.elasticsearch.common.lucene.search.NotFilter; import org.elasticsearch.common.lucene.search.XConstantScoreQuery; +import org.elasticsearch.common.lucene.search.XFilteredQuery; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -129,20 +131,25 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase childValues[i] = Integer.toString(i); } - IntOpenHashSet initialDeletedParentIds = new IntOpenHashSet(); + IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap> childValueToParentIds = new ObjectObjectOpenHashMap>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); + boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { - initialDeletedParentIds.add(parentDocId); + filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } + if (filterMe) { + filteredOrDeletedDocs.add(parentDocId); + document.add(new StringField("filter", "me", Field.Store.NO)); + } indexWriter.addDocument(document); int numChildDocs; @@ -172,7 +179,7 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase } else { childValueToParentIds.put(childValue, parentIds = new TreeSet()); } - if (!markParentAsDeleted) { + if (!markParentAsDeleted && !filterMe) { parentIds.add(parent); } } @@ -191,6 +198,7 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase ((TestSearchContext) SearchContext.current()).setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); + Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me"))); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals with the fact @@ -202,6 +210,14 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase parentFilter = rawParentFilter; } + // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not get live docs as acceptedDocs + Filter filterMe; + if (random().nextBoolean()) { + filterMe = SearchContext.current().filterCache().cache(rawFilterMe); + } else { + filterMe = rawFilterMe; + } + // Simulate a parent update if (random().nextBoolean()) { int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5); @@ -209,7 +225,7 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase int parentId; do { parentId = random().nextInt(numParentDocs); - } while (initialDeletedParentIds.contains(parentId)); + } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); @@ -244,6 +260,7 @@ public class ChildrenConstantScoreQueryTests extends ElasticsearchLuceneTestCase ) ); } + query = new XFilteredQuery(query, filterMe); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); searcher.search(query, collector); FixedBitSet actualResult = collector.getResult(); diff --git a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java index 86e797b9a2a..e120b7dff7c 100644 --- a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java @@ -158,7 +158,7 @@ public class ChildrenQueryTests extends ElasticsearchLuceneTestCase { parentFilter = rawParentFilter; } - // Using this in FQ, will invoke / test the Scorer#advance(..) + // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not get live docs as acceptedDocs Filter filterMe; if (random().nextBoolean()) { filterMe = SearchContext.current().filterCache().cache(rawFilterMe);