diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c05de03c483..fb1edea43b6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -155,6 +155,9 @@ Bug fixes as workaround); fix output of tests to use same format. (Uwe Schindler, Ramkumar Aiyengar) +* LUCENE-6593: Fixed ToChildBlockJoinQuery's scorer to not refuse to advance + to a document that belongs to the parent space. (Adrien Grand) + Changes in Runtime Behavior * LUCENE-6501: The subreader structure in ParallelCompositeReader diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java index 2eaf1bf2664..1b3505f5c36 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java @@ -159,7 +159,7 @@ public class ToChildBlockJoinQuery extends Query { private int parentFreq = 1; private int childDoc = -1; - private int parentDoc; + private int parentDoc = 0; public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, BitSet parentBits, boolean doScores, Bits acceptDocs) { super(weight); @@ -268,40 +268,41 @@ public class ToChildBlockJoinQuery extends Query { @Override public int advance(int childTarget) throws IOException { - - //System.out.println("Q.advance childTarget=" + childTarget); - if (childTarget == NO_MORE_DOCS) { - //System.out.println(" END"); - return childDoc = parentDoc = NO_MORE_DOCS; - } - - if (parentBits.get(childTarget)) { - throw new IllegalStateException(ILLEGAL_ADVANCE_ON_PARENT + childTarget); - } - - assert childDoc == -1 || childTarget != parentDoc: "childTarget=" + childTarget; - if (childDoc == -1 || childTarget > parentDoc) { - // Advance to new parent: - parentDoc = parentScorer.advance(childTarget); + if (childTarget >= parentDoc) { + if (childTarget == NO_MORE_DOCS) { + return childDoc = parentDoc = NO_MORE_DOCS; + } + parentDoc = parentScorer.advance(childTarget + 1); validateParentDoc(); - //System.out.println(" advance to parentDoc=" + parentDoc); - assert parentDoc > childTarget; + if (parentDoc == NO_MORE_DOCS) { - //System.out.println(" END"); return childDoc = NO_MORE_DOCS; } + + // scan to the first parent that has children + while (true) { + final int firstChild = parentBits.prevSetBit(parentDoc-1) + 1; + if (firstChild != parentDoc) { + // this parent has children + childTarget = Math.max(childTarget, firstChild); + break; + } + // parent with no children, move to the next one + parentDoc = parentScorer.nextDoc(); + validateParentDoc(); + if (parentDoc == NO_MORE_DOCS) { + return childDoc = NO_MORE_DOCS; + } + } + if (doScores) { parentScore = parentScorer.score(); parentFreq = parentScorer.freq(); } - final int firstChild = parentBits.prevSetBit(parentDoc-1); - //System.out.println(" firstChild=" + firstChild); - childTarget = Math.max(childTarget, firstChild); } assert childTarget < parentDoc; - - // Advance within children of current parent: + assert !parentBits.get(childTarget); childDoc = childTarget; //System.out.println(" " + childDoc); if (acceptDocs != null && !acceptDocs.get(childDoc)) { diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index b37554a0734..dcee28f3d49 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -17,13 +17,23 @@ package org.apache.lucene.search.join; * limitations under the License. */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Random; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.IntField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -37,8 +47,27 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.search.RandomApproximationQuery; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.store.Directory; @@ -50,13 +79,6 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.TestUtil; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Locale; - public class TestBlockJoin extends LuceneTestCase { // One resume... @@ -1551,4 +1573,47 @@ public class TestBlockJoin extends LuceneTestCase { r.close(); dir.close(); } + + public void testIntersectionWithRandomApproximation() throws IOException { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + final int numBlocks = atLeast(100); + for (int i = 0; i < numBlocks; ++i) { + List docs = new ArrayList<>(); + final int numChildren = random().nextInt(3); + for (int j = 0; j < numChildren; ++j) { + Document child = new Document(); + child.add(new StringField("foo_child", random().nextBoolean() ? "bar" : "baz", Store.NO)); + docs.add(child); + } + Document parent = new Document(); + parent.add(new StringField("parent", "true", Store.NO)); + parent.add(new StringField("foo_parent", random().nextBoolean() ? "bar" : "baz", Store.NO)); + docs.add(parent); + w.addDocuments(docs); + } + final IndexReader reader = w.getReader(); + final IndexSearcher searcher = newSearcher(reader); + searcher.setQueryCache(null); // to have real advance() calls + + final BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); + final Query toChild = new ToChildBlockJoinQuery(new TermQuery(new Term("foo_parent", "bar")), parentsFilter); + final Query childQuery = new TermQuery(new Term("foo_child", "baz")); + + BooleanQuery bq1 = new BooleanQuery.Builder() + .add(toChild, Occur.MUST) + .add(childQuery, Occur.MUST) + .build(); + BooleanQuery bq2 = new BooleanQuery.Builder() + .add(toChild, Occur.MUST) + .add(new RandomApproximationQuery(childQuery, random()), Occur.MUST) + .build(); + + assertEquals(searcher.count(bq1), searcher.count(bq2)); + + searcher.getIndexReader().close(); + w.close(); + dir.close(); + } }