From e327efb676e04f72c39e902f08c0d11497b4c57d Mon Sep 17 00:00:00 2001
From: Martijn van Groningen
Date: Sun, 29 Jan 2017 21:36:48 +0100
Subject: [PATCH] LUCENE-6959: Removed ToParentBlockJoinCollector in favour of
ParentChildrenBlockJoinQuery, that can return the matching children documents
per parent document. This query should be executed for each matching parent
document after the main query has been executed.
lucene/CHANGES.txt | 6 +
.../join/ | 199 ++++++
.../join/ | 507 ---------------
.../join/ | 73 ---
.../search/join/ | 108 +---
.../lucene/search/join/ | 27 +-
.../lucene/search/join/ | 582 +++---------------
.../lucene/search/join/ | 110 ++--
.../ | 104 ++++
.../search/join/ | 14 -
10 files changed, 488 insertions(+), 1242 deletions(-)
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/
delete mode 100644 lucene/join/src/java/org/apache/lucene/search/join/
delete mode 100644 lucene/join/src/java/org/apache/lucene/search/join/
create mode 100644 lucene/join/src/test/org/apache/lucene/search/join/
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 98ec1fa8a1b..e59744a6b9a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -82,6 +82,12 @@ API Changes
IndexWriter#updateDocValues() calls, to prevent calling with non-existent
docValues fields (Ishan Chattopadhyaya, Adrien Grand, Mike McCandless)
+* LUCENE-6959: Removed ToParentBlockJoinCollector in favour of
+ ParentChildrenBlockJoinQuery, that can return the matching children documents per
+ parent document. This query should be executed for each matching parent document
+ after the main query has been executed. (Adrien Grand, Martijn van Groningen,
+ Mike McCandless)
New Features
* LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ b/lucene/join/src/java/org/apache/lucene/search/join/
new file mode 100644
index 00000000000..a739294d0d9
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/
@@ -0,0 +1,199 @@
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Set;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BitSet;
+ * A query that returns all the matching child documents for a specific parent document
+ * indexed together in the same block. The provided child query determines which matching
+ * child doc is being returned.
+ *
+ * @lucene.experimental
+ */
+public class ParentChildrenBlockJoinQuery extends Query {
+ private final BitSetProducer parentFilter;
+ private final Query childQuery;
+ private final int parentDocId;
+ /**
+ * Creates a ParentChildrenBlockJoinQuery
+ *
+ * @param parentFilter A filter identifying parent documents.
+ * @param childQuery A child query that determines which child docs are matching
+ * @param parentDocId The top level doc id of that parent to return children documents for
+ */
+ public ParentChildrenBlockJoinQuery(BitSetProducer parentFilter, Query childQuery, int parentDocId) {
+ this.parentFilter = parentFilter;
+ this.childQuery = childQuery;
+ this.parentDocId = parentDocId;
+ }
+ @Override
+ public boolean equals(Object obj) {
+ if (sameClassAs(obj) == false) {
+ return false;
+ }
+ ParentChildrenBlockJoinQuery other = (ParentChildrenBlockJoinQuery) obj;
+ return parentFilter.equals(other.parentFilter)
+ && childQuery.equals(other.childQuery)
+ && parentDocId == other.parentDocId;
+ }
+ @Override
+ public int hashCode() {
+ int hash = classHash();
+ hash = 31 * hash + parentFilter.hashCode();
+ hash = 31 * hash + childQuery.hashCode();
+ hash = 31 * hash + parentDocId;
+ return hash;
+ }
+ @Override
+ public String toString(String field) {
+ return "ParentChildrenBlockJoinQuery (" + childQuery + ")";
+ }
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ final Query childRewrite = childQuery.rewrite(reader);
+ if (childRewrite != childQuery) {
+ return new ParentChildrenBlockJoinQuery(parentFilter, childRewrite, parentDocId);
+ } else {
+ return super.rewrite(reader);
+ }
+ }
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+ final Weight childWeight = childQuery.createWeight(searcher, needsScores, boost);
+ final int readerIndex = ReaderUtil.subIndex(parentDocId, searcher.getIndexReader().leaves());
+ return new Weight(this) {
+ @Override
+ public void extractTerms(Set terms) {
+ childWeight.extractTerms(terms);
+ }
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ return Explanation.noMatch("Not implemented, use ToParentBlockJoinQuery explain why a document matched");
+ }
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ // Childs docs only reside in a single segment, so no need to evaluate all segments
+ if (context.ord != readerIndex) {
+ return null;
+ }
+ final int localParentDocId = parentDocId - context.docBase;
+ // If parentDocId == 0 then a parent doc doesn't have child docs, because child docs are stored
+ // before the parent doc and because parent doc is 0 we can safely assume that there are no child docs.
+ if (localParentDocId == 0) {
+ return null;
+ }
+ final BitSet parents = parentFilter.getBitSet(context);
+ final int firstChildDocId = parents.prevSetBit(localParentDocId - 1) + 1;
+ // A parent doc doesn't have child docs, so we can early exit here:
+ if (firstChildDocId == localParentDocId) {
+ return null;
+ }
+ final Scorer childrenScorer = childWeight.scorer(context);
+ if (childrenScorer == null) {
+ return null;
+ }
+ DocIdSetIterator childrenIterator = childrenScorer.iterator();
+ final DocIdSetIterator it = new DocIdSetIterator() {
+ int doc = -1;
+ @Override
+ public int docID() {
+ return doc;
+ }
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+ @Override
+ public int advance(int target) throws IOException {
+ target = Math.max(firstChildDocId, target);
+ if (target >= localParentDocId) {
+ // We're outside the child nested scope, so it is done
+ return doc = NO_MORE_DOCS;
+ } else {
+ int advanced = childrenIterator.advance(target);
+ if (advanced >= localParentDocId) {
+ // We're outside the child nested scope, so it is done
+ return doc = NO_MORE_DOCS;
+ } else {
+ return doc = advanced;
+ }
+ }
+ }
+ @Override
+ public long cost() {
+ return Math.min(childrenIterator.cost(), localParentDocId - firstChildDocId);
+ }
+ };
+ return new Scorer(this) {
+ @Override
+ public int docID() {
+ return it.docID();
+ }
+ @Override
+ public float score() throws IOException {
+ return childrenScorer.score();
+ }
+ @Override
+ public int freq() throws IOException {
+ return childrenScorer.freq();
+ }
+ @Override
+ public DocIdSetIterator iterator() {
+ return it;
+ }
+ };
+ }
+ };
+ }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ b/lucene/join/src/java/org/apache/lucene/search/join/
deleted file mode 100644
index f81b943c0bc..00000000000
--- a/lucene/join/src/java/org/apache/lucene/search/join/
+++ /dev/null
@@ -1,507 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Queue;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.ArrayUtil;
-/** Collects parent document hits for a Query containing one more more
- * BlockJoinQuery clauses, sorted by the
- * specified parent Sort. Note that this cannot perform
- * arbitrary joins; rather, it requires that all joined
- * documents are indexed as a doc block (using {@link
- * IndexWriter#addDocuments} or {@link
- * IndexWriter#updateDocuments}). Ie, the join is computed
- * at index time.
- *
- * This collector MUST be used with {@link ToParentBlockJoinIndexSearcher},
- * in order to work correctly.
- *
- *
The parent Sort must only use
- * fields from the parent documents; sorting by field in
- * the child documents is not supported.
- *
- * You should only use this
- * collector if one or more of the clauses in the query is
- * a {@link ToParentBlockJoinQuery}. This collector will find those query
- * clauses and record the matching child documents for the
- * top scoring parent documents.
- *
- * Multiple joins (star join) and nested joins and a mix
- * of the two are allowed, as long as in all cases the
- * documents corresponding to a single row of each joined
- * parent table were indexed as a doc block.
- *
- * For the simple star join you can retrieve the
- * {@link TopGroups} instance containing each {@link ToParentBlockJoinQuery}'s
- * matching child documents for the top parent groups,
- * using {@link #getTopGroups}. Ie,
- * a single query, which will contain two or more
- * {@link ToParentBlockJoinQuery}'s as clauses representing the star join,
- * can then retrieve two or more {@link TopGroups} instances.
- *
- * For nested joins, the query will run correctly (ie,
- * match the right parent and child documents), however,
- * because TopGroups is currently unable to support nesting
- * (each group is not able to hold another TopGroups), you
- * are only able to retrieve the TopGroups of the first
- * join. The TopGroups of the nested joins will not be
- * correct.
- *
- * See {@link} for a code
- * sample.
- *
- * @lucene.experimental
- */
-public class ToParentBlockJoinCollector implements Collector {
- private final Sort sort;
- // Maps each BlockJoinQuery instance to its "slot" in
- // joinScorers and in OneGroup's cached doc/scores/count:
- private final Map joinQueryID = new HashMap<>();
- private final int numParentHits;
- private final FieldValueHitQueue queue;
- private final FieldComparator>[] comparators;
- private final boolean trackMaxScore;
- private final boolean trackScores;
- private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
- private boolean queueFull;
- private OneGroup bottom;
- private int totalHitCount;
- private float maxScore = Float.NaN;
- /** Creates a ToParentBlockJoinCollector. The provided sort must
- * not be null. If you pass true trackScores, all
- * ToParentBlockQuery instances must not use
- * ScoreMode.None. */
- public ToParentBlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) {
- // TODO: allow null sort to be specialized to relevance
- // only collector
- this.sort = sort;
- this.trackMaxScore = trackMaxScore;
- if (trackMaxScore) {
- maxScore = Float.MIN_VALUE;
- }
- //System.out.println("numParentHits=" + numParentHits);
- this.trackScores = trackScores;
- this.numParentHits = numParentHits;
- queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
- comparators = queue.getComparators();
- }
- private static final class OneGroup extends FieldValueHitQueue.Entry {
- public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
- super(comparatorSlot, parentDoc, parentScore);
- //System.out.println("make OneGroup parentDoc=" + parentDoc);
- docs = new int[numJoins][];
- for(int joinID=0;joinID queue = new LinkedList<>();
- //System.out.println("\nqueue: add top scorer=" + scorer);
- queue.add(scorer);
- while ((scorer = queue.poll()) != null) {
- //System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery());
- if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
- enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
- }
- for (ChildScorer sub : scorer.getChildren()) {
- //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
- queue.add(sub.child);
- }
- }
- }
- @Override
- public void collect(int parentDoc) throws IOException {
- //System.out.println("\nC parentDoc=" + parentDoc);
- totalHitCount++;
- float score = Float.NaN;
- if (trackMaxScore) {
- score = scorer.score();
- maxScore = Math.max(maxScore, score);
- }
- // TODO: we could sweep all joinScorers here and
- // aggregate total child hit count, so we can fill this
- // in getTopGroups (we wire it to 0 now)
- if (queueFull) {
- //System.out.println(" queueFull");
- // Fastmatch: return if this hit is not competitive
- int c = 0;
- for (int i = 0; i < comparators.length; ++i) {
- c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
- if (c != 0) {
- break;
- }
- }
- if (c <= 0) { // in case of equality, this hit is not competitive as docs are visited in order
- // Definitely not competitive.
- //System.out.println(" skip");
- return;
- }
- //System.out.println(" competes! doc=" + (docBase + parentDoc));
- // This hit is competitive - replace bottom element in queue & adjustTop
- for (LeafFieldComparator comparator : comparators) {
- comparator.copy(bottom.slot, parentDoc);
- }
- if (!trackMaxScore && trackScores) {
- score = scorer.score();
- }
- bottom.doc = docBase + parentDoc;
- bottom.readerContext = context;
- bottom.score = score;
- copyGroups(bottom);
- bottom = queue.updateTop();
- for (LeafFieldComparator comparator : comparators) {
- comparator.setBottom(bottom.slot);
- }
- } else {
- // Startup transient: queue is not yet full:
- final int comparatorSlot = totalHitCount - 1;
- // Copy hit into queue
- for (LeafFieldComparator comparator : comparators) {
- comparator.copy(comparatorSlot, parentDoc);
- }
- //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
- if (!trackMaxScore && trackScores) {
- score = scorer.score();
- }
- final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
- og.readerContext = context;
- copyGroups(og);
- bottom = queue.add(og);
- queueFull = totalHitCount == numParentHits;
- if (queueFull) {
- // End of startup transient: queue just filled up:
- for (LeafFieldComparator comparator : comparators) {
- comparator.setBottom(bottom.slot);
- }
- }
- }
- }
- // Pulls out child doc and scores for all join queries:
- private void copyGroups(OneGroup og) {
- // While rare, it's possible top arrays could be too
- // short if join query had null scorer on first
- // segment(s) but then became non-null on later segments
- final int numSubScorers = joinScorers.length;
- if ( < numSubScorers) {
- // While rare, this could happen if join query had
- // null scorer on first segment(s) but then became
- // non-null on later segments
- = ArrayUtil.grow(, numSubScorers);
- }
- if (og.counts.length < numSubScorers) {
- og.counts = ArrayUtil.grow(og.counts);
- }
- if (trackScores && og.scores.length < numSubScorers) {
- og.scores = ArrayUtil.grow(og.scores, numSubScorers);
- }
- //System.out.println("\ncopyGroups parentDoc=" + og.doc);
- for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
- final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
- //System.out.println(" scorer=" + joinScorer);
- if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
- og.counts[scorerIDX] = joinScorer.getChildCount();
- //System.out.println(" count=" + og.counts[scorerIDX]);
-[scorerIDX] = joinScorer.swapChildDocs([scorerIDX]);
- assert[scorerIDX].length >= og.counts[scorerIDX]: "length=" +[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
- //System.out.println(" len=" +[scorerIDX].length);
- /*
- for(int idx=0;idx= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
- }
- } else {
- og.counts[scorerIDX] = 0;
- }
- }
- }
- };
- }
- private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer) {
- scorer.trackPendingChildHits();
- final Integer slot = joinQueryID.get(query);
- if (slot == null) {
- joinQueryID.put(query, joinScorers.length);
- //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
- final ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1+joinScorers.length];
- System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length);
- joinScorers = newArray;
- joinScorers[joinScorers.length-1] = scorer;
- } else {
- joinScorers[slot] = scorer;
- }
- }
- private OneGroup[] sortedGroups;
- private void sortQueue() {
- sortedGroups = new OneGroup[queue.size()];
- for(int downTo=queue.size()-1;downTo>=0;downTo--) {
- sortedGroups[downTo] = queue.pop();
- }
- }
- /** Returns the TopGroups for the specified
- * BlockJoinQuery. The groupValue of each GroupDocs will
- * be the parent docID for that group.
- * The number of documents within each group is calculated as minimum of maxDocsPerGroup
- * and number of matched child documents for that group.
- * Returns null if no groups matched.
- *
- * @param query Search query
- * @param withinGroupSort Sort criteria within groups
- * @param offset Parent docs offset
- * @param maxDocsPerGroup Upper bound of documents per group number
- * @param withinGroupOffset Offset within each group of child docs
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for specified query
- * @throws IOException if there is a low-level I/O error
- */
- public TopGroups getTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
- int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
- throws IOException {
- final Integer _slot = joinQueryID.get(query);
- if (_slot == null && totalHitCount == 0) {
- return null;
- }
- if (sortedGroups == null) {
- if (offset >= queue.size()) {
- return null;
- }
- sortQueue();
- } else if (offset > sortedGroups.length) {
- return null;
- }
- return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
- }
- /**
- * Accumulates groups for the BlockJoinQuery specified by its slot.
- *
- * @param slot Search query's slot
- * @param offset Parent docs offset
- * @param maxDocsPerGroup Upper bound of documents per group number
- * @param withinGroupOffset Offset within each group of child docs
- * @param withinGroupSort Sort criteria within groups
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for the query specified by slot
- * @throws IOException if there is a low-level I/O error
- */
- @SuppressWarnings({"unchecked","rawtypes"})
- private TopGroups accumulateGroups(int slot, int offset, int maxDocsPerGroup,
- int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException {
- final GroupDocs[] groups = new GroupDocs[sortedGroups.length - offset];
- final FakeScorer fakeScorer = new FakeScorer();
- int totalGroupedHitCount = 0;
- //System.out.println("slot=" + slot);
- for(int groupIDX=offset;groupIDX= og.counts.length) {
- numChildDocs = 0;
- } else {
- numChildDocs = og.counts[slot];
- }
- // Number of documents in group should be bounded to prevent redundant memory allocation
- final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
- //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
- // At this point we hold all docs w/ in each group,
- // unsorted; we now sort them:
- final TopDocsCollector> collector;
- if (withinGroupSort == null) {
- //System.out.println("sort by score");
- // Sort by score
- if (!trackScores) {
- throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
- }
- collector = TopScoreDocCollector.create(numDocsInGroup);
- } else {
- // Sort by fields
- collector = TopFieldCollector.create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore);
- }
- LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
- leafCollector.setScorer(fakeScorer);
- for(int docIDX=0;docIDX(og.score,
- topDocs.getMaxScore(),
- numChildDocs,
- topDocs.scoreDocs,
- og.doc,
- groupSortValues);
- }
- return new TopGroups<>(new TopGroups<>(sort.getSort(),
- withinGroupSort == null ? null : withinGroupSort.getSort(),
- 0, totalGroupedHitCount, groups, maxScore),
- totalHitCount);
- }
- /** Returns the TopGroups for the specified BlockJoinQuery.
- * The groupValue of each GroupDocs will be the parent docID for that group.
- * The number of documents within each group
- * equals to the total number of matched child documents for that group.
- * Returns null if no groups matched.
- *
- * @param query Search query
- * @param withinGroupSort Sort criteria within groups
- * @param offset Parent docs offset
- * @param withinGroupOffset Offset within each group of child docs
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for specified query
- * @throws IOException if there is a low-level I/O error
- */
- public TopGroups getTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
- int withinGroupOffset, boolean fillSortFields)
- throws IOException {
- return getTopGroups(query, withinGroupSort, offset, Integer.MAX_VALUE, withinGroupOffset, fillSortFields);
- }
- /**
- * Returns the highest score across all collected parent hits, as long as
- * trackMaxScores=true
was passed
- * {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
- * construction}. Else, this returns Float.NaN
- */
- public float getMaxScore() {
- return maxScore;
- }
- @Override
- public boolean needsScores() {
- // needed so that eg. BooleanQuery does not rewrite its MUST clauses to
- // FILTER since the filter scorers are hidden in Scorer.getChildren().
- return true;
- }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ b/lucene/join/src/java/org/apache/lucene/search/join/
deleted file mode 100644
index 84a02a38214..00000000000
--- a/lucene/join/src/java/org/apache/lucene/search/join/
+++ /dev/null
@@ -1,73 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.Bits;
- * An {@link IndexSearcher} to use in conjunction with
- * {@link ToParentBlockJoinCollector}.
- */
-public class ToParentBlockJoinIndexSearcher extends IndexSearcher {
- /** Creates a searcher searching the provided index. Search on individual
- * segments will be run in the provided {@link ExecutorService}.
- * @see IndexSearcher#IndexSearcher(IndexReader, ExecutorService) */
- public ToParentBlockJoinIndexSearcher(IndexReader r, ExecutorService executor) {
- super(r, executor);
- }
- /** Creates a searcher searching the provided index.
- * @see IndexSearcher#IndexSearcher(IndexReader) */
- public ToParentBlockJoinIndexSearcher(IndexReader r) {
- super(r);
- }
- @Override
- protected void search(List leaves, Weight weight, Collector collector) throws IOException {
- for (LeafReaderContext ctx : leaves) { // search each subreader
- // we force the use of Scorer (not BulkScorer) to make sure
- // that the scorer passed to LeafCollector.setScorer supports
- // Scorer.getChildren
- Scorer scorer = weight.scorer(ctx);
- if (scorer != null) {
- final LeafCollector leafCollector = collector.getLeafCollector(ctx);
- leafCollector.setScorer(scorer);
- final Bits liveDocs = ctx.reader().getLiveDocs();
- final DocIdSetIterator it = scorer.iterator();
- for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
- if (liveDocs == null || liveDocs.get(doc)) {
- leafCollector.collect(doc);
- }
- }
- }
- }
- }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ b/lucene/join/src/java/org/apache/lucene/search/join/
index 432ebccd58e..6369eea0de9 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/
+++ b/lucene/join/src/java/org/apache/lucene/search/join/
@@ -30,8 +30,6 @@ import;
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
@@ -57,20 +55,6 @@ import org.apache.lucene.util.BitSet;
* documents: the wrapped child query must never
* return a parent document.
- * If you'd like to retrieve {@link TopGroups} for the
- * resulting query, use the {@link ToParentBlockJoinCollector}.
- * Note that this is not necessary, ie, if you simply want
- * to collect the parent documents and don't need to see
- * which child documents matched under that parent, then
- * you can use any collector.
- *
- * NOTE: If the overall query contains parent-only
- * matches, for example you OR a parent-only query with a
- * joined child-only query, then the resulting collected documents
- * will be correct, however the {@link TopGroups} you get
- * from {@link ToParentBlockJoinCollector} will not contain every
- * child for parents that had matched.
- *
See {@link} for an
* overview.
@@ -171,39 +155,7 @@ public class ToParentBlockJoinQuery extends Query {
- /**
- * Ascendant for {@link ToParentBlockJoinQuery}'s scorer.
- * @lucene.experimental it might be removed at 6.0
- * */
- public static abstract class ChildrenMatchesScorer extends Scorer{
- /** inherited constructor */
- protected ChildrenMatchesScorer(Weight weight) {
- super(weight);
- }
- /**
- * enables children matches recording
- * */
- public abstract void trackPendingChildHits() ;
- /**
- * reports matched children
- * @return number of recorded matched children docs
- * */
- public abstract int getChildCount() ;
- /**
- * reports matched children
- * @param other array for recording matching children docs of next parent,
- * it might be null (that's slower) or the same array which was returned
- * from the previous call
- * @return array with {@link #getChildCount()} matched children docnums
- * */
- public abstract int[] swapChildDocs(int[] other);
- }
- static class BlockJoinScorer extends ChildrenMatchesScorer {
+ static class BlockJoinScorer extends Scorer {
private final Scorer childScorer;
private final BitSet parentBits;
private final ScoreMode scoreMode;
@@ -212,8 +164,6 @@ public class ToParentBlockJoinQuery extends Query {
private float parentScore;
private int parentFreq;
private int nextChildDoc;
- private int[] pendingChildDocs;
- private float[] pendingChildScores;
private int childDocUpto;
public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
@@ -230,39 +180,6 @@ public class ToParentBlockJoinQuery extends Query {
return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
- @Override
- public int getChildCount() {
- return childDocUpto;
- }
- int getParentDoc() {
- return parentDoc;
- }
- @Override
- public int[] swapChildDocs(int[] other) {
- final int[] ret = pendingChildDocs;
- if (other == null) {
- pendingChildDocs = new int[5];
- } else {
- pendingChildDocs = other;
- }
- return ret;
- }
- float[] swapChildScores(float[] other) {
- if (scoreMode == ScoreMode.None) {
- throw new IllegalStateException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
- }
- final float[] ret = pendingChildScores;
- if (other == null) {
- pendingChildScores = new float[5];
- } else {
- pendingChildScores = other;
- }
- return ret;
- }
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
@@ -297,22 +214,10 @@ public class ToParentBlockJoinQuery extends Query {
do {
//System.out.println(" c=" + nextChildDoc);
- if (pendingChildDocs != null && pendingChildDocs.length == childDocUpto) {
- pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
- }
- if (pendingChildScores != null && scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
- pendingChildScores = ArrayUtil.grow(pendingChildScores);
- }
- if (pendingChildDocs != null) {
- pendingChildDocs[childDocUpto] = nextChildDoc;
- }
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final int childFreq = childScorer.freq();
- if (pendingChildScores != null) {
- pendingChildScores[childDocUpto] = childScore;
- }
maxScore = Math.max(childScore, maxScore);
minScore = Math.min(childScore, minScore);
totalScore += childScore;
@@ -440,17 +345,6 @@ public class ToParentBlockJoinQuery extends Query {
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
- /**
- * Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
- */
- @Override
- public void trackPendingChildHits() {
- pendingChildDocs = new int[5];
- if (scoreMode != ScoreMode.None) {
- pendingChildScores = new float[5];
- }
- }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ b/lucene/join/src/java/org/apache/lucene/search/join/
index 6133f99748f..5b05c1f9c2a 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/
+++ b/lucene/join/src/java/org/apache/lucene/search/join/
@@ -41,14 +41,25 @@
* resulting query can then be used as a clause in any query that
* matches parent.
- * If you only care about the parent documents matching the query, you
- * can use any collector to collect the parent hits, but if you'd also
- * like to see which child documents match for each parent document,
- * use the {@link} to collect the hits. Once the
- * search is done, you retrieve a {@link
- *} instance from the
- * {@link ToParentBlockJoinCollector.getTopGroups()} method.
- *
+ * If you care about what child documents matched for each parent document,
+ * then use the {@link} query to
+ * per matched parent document retrieve the child documents that caused to match the
+ * parent document in first place. This query should be used after your main query
+ * has been executed. For each hit execute the the
+ * {@link} query
+ *
+ * TopDocs results =, 10);
+ * for (int i = 0; i < results.scoreDocs.length; i++) {
+ * ScoreDoc scoreDoc = results.scoreDocs[i];
+ *
+ * // Run ParentChildrenBlockJoinQuery to figure out the top matching child docs:
+ * ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery =
+ * new ParentChildrenBlockJoinQuery(parentFilter, childQuery, scoreDoc.doc);
+ * TopDocs topChildResults =, 3);
+ * // Process top child hits...
+ * }
+ *
+ *
* To map/join in the opposite direction, use {@link
*}. This wraps
* any query matching parent documents, creating the joined query
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/ b/lucene/join/src/test/org/apache/lucene/search/join/
index cf21fa40432..f508f84e2fd 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/
+++ b/lucene/join/src/test/org/apache/lucene/search/join/
@@ -22,15 +22,18 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
@@ -47,30 +50,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
@@ -157,7 +138,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r =;
- IndexSearcher s = new IndexSearcher(r);
+ IndexSearcher s = newSearcher(r);
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
@@ -170,18 +151,21 @@ public class TestBlockJoin extends LuceneTestCase {
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
-, c);
- TopGroups results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(Float.isNaN(results.maxScore));
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
- final GroupDocs group = results.groups[0];
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
+ TopDocs topDocs =, 2);
+ assertEquals(2, topDocs.totalHits);
+ assertEquals(asSet("Lisa", "Frank"),
+ asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[1].doc);
+ matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
@@ -207,8 +191,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
- IndexSearcher s = new ToParentBlockJoinIndexSearcher(r);
- //IndexSearcher s = newSearcher(r, false);
+ IndexSearcher s = newSearcher(r, false);
//IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
@@ -232,23 +215,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.SHOULD));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.SHOULD));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
-, c);
- TopGroups results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
+ final TopDocs topDocs =, 2);
+ assertEquals(2, topDocs.totalHits);
+ assertEquals(asSet("Lisa", "Frank"),
+ asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
- final GroupDocs group = results.groups[0];
- assertEquals(1, group.totalHits);
- assertFalse(Float.isNaN(group.score));
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[1].doc);
+ matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
@@ -297,30 +278,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
CheckHits.checkHitCollector(random(),, "country", s, new int[] {2});
-, c);
- TopGroups results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(Float.isNaN(results.maxScore));
+ TopDocs topDocs =, 1);
//assertEquals(1, results.totalHitCount);
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
- final GroupDocs group = results.groups[0];
- assertEquals(1, group.totalHits);
- assertFalse(Float.isNaN(group.score));
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
+ assertEquals(1, topDocs.totalHits);
+ Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
//System.out.println("TEST: now test up");
@@ -333,7 +305,7 @@ public class TestBlockJoin extends LuceneTestCase {
//System.out.println("FULL: " + fullChildQuery);
TopDocs hits =, 10);
assertEquals(1, hits.totalHits);
- childDoc = s.doc(hits.scoreDocs[0].doc);
+ Document childDoc = s.doc(hits.scoreDocs[0].doc);
//System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, childDoc.getField("year").numericValue());
@@ -347,72 +319,6 @@ public class TestBlockJoin extends LuceneTestCase {
- public void testBugCausedByRewritingTwice() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
- final List docs = new ArrayList<>();
- for (int i=0;i<10;i++) {
- docs.clear();
- docs.add(makeJob("ruby", i));
- docs.add(makeJob("java", 2007));
- docs.add(makeResume("Frank", "United States"));
- w.addDocuments(docs);
- }
- IndexReader r = w.getReader();
- w.close();
- IndexSearcher s = newSearcher(r, false);
- // Hacky: this causes the query to need 2 rewrite
- // iterations:
- BooleanQuery.Builder builder = new BooleanQuery.Builder();
- builder.add(IntPoint.newExactQuery("year", 2007), BooleanClause.Occur.MUST);
- Query qc = new Query() {
- @Override
- public Query rewrite(IndexReader reader) throws IOException {
- return;
- }
- @Override
- public String toString(String field) {
- return "hack!";
- }
- @Override
- public boolean equals(Object o) {
- return o == this;
- }
- @Override
- public int hashCode() {
- return System.identityHashCode(this);
- }
- };
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
- CheckJoinIndex.check(r, parentsFilter);
- Query qw1 = qc.rewrite(r);
- Query qw2 = qw1.rewrite(r);
- assertNotSame(qc, qw1);
- assertNotSame(qw1, qw2);
- ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
-, c);
- TopGroups groups = c.getTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
- for (GroupDocs group : groups.groups) {
- assertEquals(1, group.totalHits);
- }
- r.close();
- dir.close();
- }
protected Query skill(String skill) {
return new TermQuery(new Term("skill", skill));
@@ -612,6 +518,7 @@ public class TestBlockJoin extends LuceneTestCase {
final Directory dir = newDirectory();
final Directory joinDir = newDirectory();
+ final int maxNumChildrenPerParent = 20;
final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
@@ -669,7 +576,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println(" " + sb.toString());
- final int numChildDocs = TestUtil.nextInt(random(), 1, 20);
+ final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
for(int childDocID=0;childDocID joinResults = new TreeMap<>();
+ for (ScoreDoc parentHit : joinedResults.scoreDocs) {
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
+ TopDocs childTopDocs =, maxNumChildrenPerParent, childSort);
+ final Document parentDoc = joinS.doc(parentHit.doc);
+ joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
- final ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
-, c);
final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
//final int hitsPerGroup = 100;
- final TopGroups joinResults = c.getTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
if (VERBOSE) {
- System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.groups.length) + " groups; hitsPerGroup=" + hitsPerGroup);
+ System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null) {
- final GroupDocs[] groups = joinResults.groups;
- for(int groupIDX=0;groupIDX group = groups[groupIDX];
- if (group.groupSortValues != null) {
- System.out.print(" ");
- for(Object o : group.groupSortValues) {
- if (o instanceof BytesRef) {
- System.out.print(((BytesRef) o).utf8ToString() + " ");
- } else {
- System.out.print(o + " ");
- }
- }
- System.out.println();
- }
- assertNotNull(group.groupValue);
- final Document parentDoc = joinS.doc(group.groupValue);
- System.out.println(" group parentID=" + parentDoc.get("parentID") + " (docID=" + group.groupValue + ")");
- for(int hitIDX=0;hitIDX entry : joinResults.entrySet()) {
+ System.out.println(" group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
+ for(ScoreDoc childHit : entry.getValue().scoreDocs) {
+ final Document doc = joinS.doc(childHit.doc);
+// System.out.println(" score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
+ System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
if (results.totalHits == 0) {
- assertNull(joinResults);
+ assertEquals(0, joinResults.size());
} else {
compareHits(r, joinR, results, joinResults);
TopDocs b =, 10);
@@ -1115,43 +1002,24 @@ public class TestBlockJoin extends LuceneTestCase {
- private void compareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups joinResults) throws Exception {
- // results is 'complete'; joinResults is a subset
- int resultUpto = 0;
- int joinGroupUpto = 0;
- final ScoreDoc[] hits = results.scoreDocs;
- final GroupDocs[] groupDocs = joinResults.groups;
- while(joinGroupUpto < groupDocs.length) {
- final GroupDocs group = groupDocs[joinGroupUpto++];
- final ScoreDoc[] groupHits = group.scoreDocs;
- assertNotNull(group.groupValue);
- final Document parentDoc = joinR.document(group.groupValue);
- final String parentID = parentDoc.get("parentID");
- //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
- assertNotNull(parentID);
- assertTrue(groupHits.length > 0);
- for(int hitIDX=0;hitIDX joinResults) throws Exception {
+ int currentParentID = -1;
+ int childHitSlot = 0;
+ TopDocs childHits = new TopDocs(0, new ScoreDoc[0], 0f);
+ for (ScoreDoc controlHit : controlHits.scoreDocs) {
+ Document controlDoc = r.document(controlHit.doc);
+ int parentID = Integer.valueOf(controlDoc.get("parentID"));
+ if (parentID != currentParentID) {
+ assertEquals(childHitSlot, childHits.scoreDocs.length);
+ currentParentID = parentID;
+ childHitSlot = 0;
+ childHits = joinResults.get(parentID);
- if (joinGroupUpto < groupDocs.length) {
- // Advance non-join hit to the next parentID:
- //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.length=" + groupDocs.length + " parentID=" + parentID);
- while(true) {
- assertTrue(resultUpto < hits.length);
- if (!parentID.equals(r.document(hits[resultUpto].doc).get("parentID"))) {
- break;
- }
- resultUpto++;
- }
- }
+ String controlChildID = controlDoc.get("childID");
+ Document childDoc = joinR.document(childHits.scoreDocs[childHitSlot++].doc);
+ String childID = childDoc.get("childID");
+ assertEquals(controlChildID, childID);
@@ -1200,43 +1068,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
- // Collects all job and qualification child docs for
- // each resume hit in the top N (sorted by score):
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
-, c);
- // Examine "Job" children
- TopGroups jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
- //assertEquals(1, results.totalHitCount);
- assertEquals(1, jobResults.totalGroupedHitCount);
- assertEquals(1, jobResults.groups.length);
- final GroupDocs group = jobResults.groups[0];
- assertEquals(1, group.totalHits);
- Document childJobDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childJobDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
+ final TopDocs topDocs =, 10);
+ assertEquals(1, topDocs.totalHits);
+ Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
- // Now Examine qualification children
- TopGroups qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
- assertEquals(1, qualificationResults.totalGroupedHitCount);
- assertEquals(1, qualificationResults.groups.length);
- final GroupDocs qGroup = qualificationResults.groups[0];
- assertEquals(1, qGroup.totalHits);
- Document childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc);
- assertEquals("maths", childQualificationDoc.get("qualification"));
- assertNotNull(qGroup.groupValue);
- parentDoc = s.doc(qGroup.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter,, topDocs.scoreDocs[0].doc);
+ matchingChildren =, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("maths", s.doc(matchingChildren.scoreDocs[0].doc).get("qualification"));
@@ -1300,165 +1146,6 @@ public class TestBlockJoin extends LuceneTestCase {
- public void testGetTopGroups() throws Exception {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
- final List docs = new ArrayList<>();
- docs.add(makeJob("ruby", 2005));
- docs.add(makeJob("java", 2006));
- docs.add(makeJob("java", 2010));
- docs.add(makeJob("java", 2012));
- Collections.shuffle(docs, random());
- docs.add(makeResume("Frank", "United States"));
- addSkillless(w);
- w.addDocuments(docs);
- addSkillless(w);
- IndexReader r = w.getReader();
- w.close();
- IndexSearcher s = new IndexSearcher(r);
- // Create a filter that defines "parent" documents in the index - in this case resumes
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
- CheckJoinIndex.check(s.getIndexReader(), parentsFilter);
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
- childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
- childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(, parentsFilter, ScoreMode.Avg);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
-, c);
- //Get all child documents within groups
- @SuppressWarnings({"unchecked","rawtypes"})
- TopGroups[] getTopGroupsResults = new TopGroups[2];
- getTopGroupsResults[0] = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- getTopGroupsResults[1] = c.getTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
- for (TopGroups results : getTopGroupsResults) {
- assertFalse(Float.isNaN(results.maxScore));
- assertEquals(2, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
- final GroupDocs group = results.groups[0];
- assertEquals(2, group.totalHits);
- assertFalse(Float.isNaN(group.score));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Frank", parentDoc.get("name"));
- assertEquals(2, group.scoreDocs.length); //all matched child documents collected
- for (ScoreDoc scoreDoc : group.scoreDocs) {
- Document childDoc = s.doc(scoreDoc.doc);
- assertEquals("java", childDoc.get("skill"));
- int year = Integer.parseInt(childDoc.get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
- }
- //Get part of child documents
- TopGroups boundedResults = c.getTopGroups(childJoinQuery, null, 0, 1, 0, true);
- assertFalse(Float.isNaN(boundedResults.maxScore));
- assertEquals(2, boundedResults.totalGroupedHitCount);
- assertEquals(1, boundedResults.groups.length);
- final GroupDocs group = boundedResults.groups[0];
- assertEquals(2, group.totalHits);
- assertFalse(Float.isNaN(group.score));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Frank", parentDoc.get("name"));
- assertEquals(1, group.scoreDocs.length); //not all matched child documents collected
- for (ScoreDoc scoreDoc : group.scoreDocs) {
- Document childDoc = s.doc(scoreDoc.doc);
- assertEquals("java", childDoc.get("skill"));
- int year = Integer.parseInt(childDoc.get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
- r.close();
- dir.close();
- }
- // LUCENE-4968
- public void testSometimesParentOnlyMatches() throws Exception {
- Directory d = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), d);
- Document parent = new Document();
- parent.add(new StoredField("parentID", "0"));
- parent.add(new SortedDocValuesField("parentID", new BytesRef("0")));
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- List docs = new ArrayList<>();
- Document child = new Document();
- docs.add(child);
- child.add(new StoredField("childID", "0"));
- child.add(newTextField("childText", "text", Field.Store.NO));
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
- docs.clear();
- parent = new Document();
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- parent.add(new StoredField("parentID", "1"));
- parent.add(new SortedDocValuesField("parentID", new BytesRef("1")));
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
- IndexReader r = w.getReader();
- w.close();
- IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
- Query childQuery = new TermQuery(new Term("childText", "text"));
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
- CheckJoinIndex.check(r, parentsFilter);
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
-, c);
- TopGroups groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
- // Two parents:
- assertEquals(2, groups.totalGroupCount.intValue());
- // One child docs:
- assertEquals(1, groups.totalGroupedHitCount);
- GroupDocs group = groups.groups[0];
- Document doc = r.document(group.groupValue.intValue());
- assertEquals("0", doc.get("parentID"));
- group = groups.groups[1];
- doc = r.document(group.groupValue.intValue());
- assertEquals("1", doc.get("parentID"));
- r.close();
- d.close();
- }
// LUCENE-4968
public void testChildQueryNeverMatches() throws Exception {
Directory d = newDirectory();
@@ -1496,90 +1183,25 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
- IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
+ IndexSearcher searcher = newSearcher(r);
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
-, c);
- TopGroups groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
+ Weight weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
+ Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertNull(scorer);
- // Two parents:
- assertEquals(2, groups.totalGroupCount.intValue());
- // One child docs:
- assertEquals(0, groups.totalGroupedHitCount);
- GroupDocs group = groups.groups[0];
- Document doc = r.document(group.groupValue.intValue());
- assertEquals("0", doc.get("parentID"));
- group = groups.groups[1];
- doc = r.document(group.groupValue.intValue());
- assertEquals("1", doc.get("parentID"));
- r.close();
- d.close();
- }
- // LUCENE-4968
- public void testChildQueryMatchesParent() throws Exception {
- Directory d = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), d);
- Document parent = new Document();
- parent.add(new StoredField("parentID", "0"));
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- List docs = new ArrayList<>();
- Document child = new Document();
- docs.add(child);
- child.add(new StoredField("childID", "0"));
- child.add(newTextField("childText", "text", Field.Store.NO));
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
- docs.clear();
- parent = new Document();
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- parent.add(new StoredField("parentID", "1"));
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
- IndexReader r = w.getReader();
- w.close();
- // illegally matches parent:
- Query childQuery = new TermQuery(new Term("parentText", "text"));
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
- CheckJoinIndex.check(r, parentsFilter);
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
- expectThrows(IllegalStateException.class, () -> {
- newSearcher(r).search(, c);
- });
+ // never matches and produces a null scorer
+ childQuery = new TermQuery(new Term("bogus", "bogus"));
+ childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
+ scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertNull(scorer);
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/ b/lucene/join/src/test/org/apache/lucene/search/join/
index 6e20f232a2b..39979ace2c5 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/
+++ b/lucene/join/src/test/org/apache/lucene/search/join/
@@ -454,59 +454,7 @@ public class TestJoinUtil extends LuceneTestCase {
public void testMinMaxScore() throws Exception {
String priceField = "price";
- // FunctionQuery would be helpful, but join module doesn't depend on queries module.
- Query priceQuery = new Query() {
- private final Query fieldQuery = new FieldValueQuery(priceField);
- @Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
- Weight fieldWeight = fieldQuery.createWeight(searcher, false, boost);
- return new Weight(this) {
- @Override
- public void extractTerms(Set terms) {
- }
- @Override
- public Explanation explain(LeafReaderContext context, int doc) throws IOException {
- return null;
- }
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- Scorer fieldScorer = fieldWeight.scorer(context);
- if (fieldScorer == null) {
- return null;
- }
- NumericDocValues price = context.reader().getNumericDocValues(priceField);
- return new FilterScorer(fieldScorer, this) {
- @Override
- public float score() throws IOException {
- assertEquals(in.docID(), price.nextDoc());
- return (float) price.longValue();
- }
- };
- }
- };
- }
- @Override
- public String toString(String field) {
- return fieldQuery.toString(field);
- }
- @Override
- public boolean equals(Object o) {
- return o == this;
- }
- @Override
- public int hashCode() {
- return System.identityHashCode(this);
- }
- };
+ Query priceQuery = numericDocValuesScoreQuery(priceField);
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
@@ -579,6 +527,62 @@ public class TestJoinUtil extends LuceneTestCase {
+ // FunctionQuery would be helpful, but join module doesn't depend on queries module.
+ static Query numericDocValuesScoreQuery(final String field) {
+ return new Query() {
+ private final Query fieldQuery = new FieldValueQuery(field);
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+ Weight fieldWeight = fieldQuery.createWeight(searcher, false, boost);
+ return new Weight(this) {
+ @Override
+ public void extractTerms(Set terms) {
+ }
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ return null;
+ }
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ Scorer fieldScorer = fieldWeight.scorer(context);
+ if (fieldScorer == null) {
+ return null;
+ }
+ NumericDocValues price = context.reader().getNumericDocValues(field);
+ return new FilterScorer(fieldScorer, this) {
+ @Override
+ public float score() throws IOException {
+ assertEquals(in.docID(), price.advance(in.docID()));
+ return (float) price.longValue();
+ }
+ };
+ }
+ };
+ }
+ @Override
+ public String toString(String field) {
+ return fieldQuery.toString(field);
+ }
+ @Override
+ public boolean equals(Object o) {
+ return o == this;
+ }
+ @Override
+ public int hashCode() {
+ return System.identityHashCode(this);
+ }
+ };
+ }
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/ b/lucene/join/src/test/org/apache/lucene/search/join/
new file mode 100644
index 00000000000..fe849cbf753
--- /dev/null
+++ b/lucene/join/src/test/org/apache/lucene/search/join/
@@ -0,0 +1,104 @@
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.LuceneTestCase;
+public class TestParentChildrenBlockJoinQuery extends LuceneTestCase {
+ public void testParentChildrenBlockJoinQuery() throws Exception {
+ int numParentDocs = 8 + random().nextInt(8);
+ int maxChildDocsPerParent = 8 + random().nextInt(8);
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ for (int i = 0; i < numParentDocs; i++) {
+ int numChildDocs = random().nextInt(maxChildDocsPerParent);
+ List docs = new ArrayList<>(numChildDocs + 1);
+ for (int j = 0; j < numChildDocs; j++) {
+ Document childDoc = new Document();
+ childDoc.add(new StringField("type", "child", Field.Store.NO));
+ childDoc.add(new NumericDocValuesField("score", j + 1));
+ docs.add(childDoc);
+ }
+ Document parenDoc = new Document();
+ parenDoc.add(new StringField("type", "parent", Field.Store.NO));
+ parenDoc.add(new NumericDocValuesField("num_child_docs", numChildDocs));
+ docs.add(parenDoc);
+ writer.addDocuments(docs);
+ }
+ IndexReader reader = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(reader);
+ BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
+ Query childQuery = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("type", "child")), BooleanClause.Occur.FILTER)
+ .add(TestJoinUtil.numericDocValuesScoreQuery("score"), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs parentDocs = TermQuery(new Term("type", "parent")), numParentDocs);
+ assertEquals(parentDocs.scoreDocs.length, numParentDocs);
+ for (ScoreDoc parentScoreDoc : parentDocs.scoreDocs) {
+ LeafReaderContext leafReader = reader.leaves().get(ReaderUtil.subIndex(parentScoreDoc.doc, reader.leaves()));
+ NumericDocValues numericDocValuesField = leafReader.reader().getNumericDocValues("num_child_docs");
+ numericDocValuesField.advance(parentScoreDoc.doc - leafReader.docBase);
+ long expectedChildDocs = numericDocValuesField.longValue();
+ ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery =
+ new ParentChildrenBlockJoinQuery(parentFilter, childQuery, parentScoreDoc.doc);
+ TopDocs topDocs =, maxChildDocsPerParent);
+ assertEquals(expectedChildDocs, topDocs.totalHits);
+ if (expectedChildDocs > 0) {
+ assertEquals(expectedChildDocs, topDocs.getMaxScore(), 0);
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ ScoreDoc childScoreDoc = topDocs.scoreDocs[i];
+ assertEquals(expectedChildDocs - i, childScoreDoc.score, 0);
+ }
+ }
+ }
+ reader.close();
+ dir.close();
+ }
diff --git a/solr/core/src/java/org/apache/solr/search/join/ b/solr/core/src/java/org/apache/solr/search/join/
index 00b3ac4a593..769a9c82300 100644
--- a/solr/core/src/java/org/apache/solr/search/join/
+++ b/solr/core/src/java/org/apache/solr/search/join/
@@ -19,11 +19,9 @@ package;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
* For each collected parent document creates matched block, which is a docSet with matched children and parent doc
@@ -32,8 +30,6 @@ import
class BlockJoinFacetAccsHolder {
private BlockJoinFieldFacetAccumulator[] blockJoinFieldFacetAccumulators;
private boolean firstSegment = true;
- private ChildrenMatchesScorer blockJoinScorer;
- private int[] childDocs = new int[0];
BlockJoinFacetAccsHolder(SolrQueryRequest req) throws IOException {
String[] facetFieldNames = BlockJoinFacetComponentSupport.getChildFacetFields(req);
@@ -61,16 +57,6 @@ class BlockJoinFacetAccsHolder {
- protected void incrementFacets(int parent) throws IOException {
- final int[] docNums = blockJoinScorer.swapChildDocs(childDocs);
- // now we don't
- //includeParentDoc(parent);
- //final int childCountPlusParent = childTracking.getChildCount()+1;
- final int childCountNoParent = blockJoinScorer.getChildCount();
- final SortedIntsAggDocIterator iter = new SortedIntsAggDocIterator(docNums, childCountNoParent, parent);
- countFacets(iter);
- }
/** is not used
protected int[] includeParentDoc(int parent) {
final int[] docNums = ArrayUtil.grow(childTracking.getChildDocs(), childTracking.getChildCount()+1);