LUCENE-6959: Removed ToParentBlockJoinCollector in favour of ParentChildrenBlockJoinQuery, that can return the matching children documents per parent document.

This query should be executed for each matching parent document after the main query has been executed.
This commit is contained in:
Martijn van Groningen 2017-01-29 21:36:48 +01:00
parent 6f598d2469
commit e327efb676
10 changed files with 488 additions and 1242 deletions

View File

@ -82,6 +82,12 @@ API Changes
IndexWriter#updateDocValues() calls, to prevent calling with non-existent
docValues fields (Ishan Chattopadhyaya, Adrien Grand, Mike McCandless)
* LUCENE-6959: Removed ToParentBlockJoinCollector in favour of
ParentChildrenBlockJoinQuery, that can return the matching children documents per
parent document. This query should be executed for each matching parent document
after the main query has been executed. (Adrien Grand, Martijn van Groningen,
Mike McCandless)
New Features
* LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,

View File

@ -0,0 +1,199 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
/**
* A query that returns all the matching child documents for a specific parent document
* indexed together in the same block. The provided child query determines which matching
* child doc is being returned.
*
* @lucene.experimental
*/
public class ParentChildrenBlockJoinQuery extends Query {
private final BitSetProducer parentFilter;
private final Query childQuery;
private final int parentDocId;
/**
* Creates a <code>ParentChildrenBlockJoinQuery</code> instance
*
* @param parentFilter A filter identifying parent documents.
* @param childQuery A child query that determines which child docs are matching
* @param parentDocId The top level doc id of that parent to return children documents for
*/
public ParentChildrenBlockJoinQuery(BitSetProducer parentFilter, Query childQuery, int parentDocId) {
this.parentFilter = parentFilter;
this.childQuery = childQuery;
this.parentDocId = parentDocId;
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
ParentChildrenBlockJoinQuery other = (ParentChildrenBlockJoinQuery) obj;
return parentFilter.equals(other.parentFilter)
&& childQuery.equals(other.childQuery)
&& parentDocId == other.parentDocId;
}
@Override
public int hashCode() {
int hash = classHash();
hash = 31 * hash + parentFilter.hashCode();
hash = 31 * hash + childQuery.hashCode();
hash = 31 * hash + parentDocId;
return hash;
}
@Override
public String toString(String field) {
return "ParentChildrenBlockJoinQuery (" + childQuery + ")";
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
final Query childRewrite = childQuery.rewrite(reader);
if (childRewrite != childQuery) {
return new ParentChildrenBlockJoinQuery(parentFilter, childRewrite, parentDocId);
} else {
return super.rewrite(reader);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final Weight childWeight = childQuery.createWeight(searcher, needsScores, boost);
final int readerIndex = ReaderUtil.subIndex(parentDocId, searcher.getIndexReader().leaves());
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
childWeight.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return Explanation.noMatch("Not implemented, use ToParentBlockJoinQuery explain why a document matched");
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
// Childs docs only reside in a single segment, so no need to evaluate all segments
if (context.ord != readerIndex) {
return null;
}
final int localParentDocId = parentDocId - context.docBase;
// If parentDocId == 0 then a parent doc doesn't have child docs, because child docs are stored
// before the parent doc and because parent doc is 0 we can safely assume that there are no child docs.
if (localParentDocId == 0) {
return null;
}
final BitSet parents = parentFilter.getBitSet(context);
final int firstChildDocId = parents.prevSetBit(localParentDocId - 1) + 1;
// A parent doc doesn't have child docs, so we can early exit here:
if (firstChildDocId == localParentDocId) {
return null;
}
final Scorer childrenScorer = childWeight.scorer(context);
if (childrenScorer == null) {
return null;
}
DocIdSetIterator childrenIterator = childrenScorer.iterator();
final DocIdSetIterator it = new DocIdSetIterator() {
int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
target = Math.max(firstChildDocId, target);
if (target >= localParentDocId) {
// We're outside the child nested scope, so it is done
return doc = NO_MORE_DOCS;
} else {
int advanced = childrenIterator.advance(target);
if (advanced >= localParentDocId) {
// We're outside the child nested scope, so it is done
return doc = NO_MORE_DOCS;
} else {
return doc = advanced;
}
}
}
@Override
public long cost() {
return Math.min(childrenIterator.cost(), localParentDocId - firstChildDocId);
}
};
return new Scorer(this) {
@Override
public int docID() {
return it.docID();
}
@Override
public float score() throws IOException {
return childrenScorer.score();
}
@Override
public int freq() throws IOException {
return childrenScorer.freq();
}
@Override
public DocIdSetIterator iterator() {
return it;
}
};
}
};
}
}

View File

@ -1,507 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldValueHitQueue;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreCachingWrappingScorer;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Scorer.ChildScorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.ArrayUtil;
/** Collects parent document hits for a Query containing one more more
* BlockJoinQuery clauses, sorted by the
* specified parent Sort. Note that this cannot perform
* arbitrary joins; rather, it requires that all joined
* documents are indexed as a doc block (using {@link
* IndexWriter#addDocuments} or {@link
* IndexWriter#updateDocuments}). Ie, the join is computed
* at index time.
*
* <p>This collector MUST be used with {@link ToParentBlockJoinIndexSearcher},
* in order to work correctly.
*
* <p>The parent Sort must only use
* fields from the parent documents; sorting by field in
* the child documents is not supported.</p>
*
* <p>You should only use this
* collector if one or more of the clauses in the query is
* a {@link ToParentBlockJoinQuery}. This collector will find those query
* clauses and record the matching child documents for the
* top scoring parent documents.</p>
*
* <p>Multiple joins (star join) and nested joins and a mix
* of the two are allowed, as long as in all cases the
* documents corresponding to a single row of each joined
* parent table were indexed as a doc block.</p>
*
* <p>For the simple star join you can retrieve the
* {@link TopGroups} instance containing each {@link ToParentBlockJoinQuery}'s
* matching child documents for the top parent groups,
* using {@link #getTopGroups}. Ie,
* a single query, which will contain two or more
* {@link ToParentBlockJoinQuery}'s as clauses representing the star join,
* can then retrieve two or more {@link TopGroups} instances.</p>
*
* <p>For nested joins, the query will run correctly (ie,
* match the right parent and child documents), however,
* because TopGroups is currently unable to support nesting
* (each group is not able to hold another TopGroups), you
* are only able to retrieve the TopGroups of the first
* join. The TopGroups of the nested joins will not be
* correct.
*
* See {@link org.apache.lucene.search.join} for a code
* sample.
*
* @lucene.experimental
*/
public class ToParentBlockJoinCollector implements Collector {
private final Sort sort;
// Maps each BlockJoinQuery instance to its "slot" in
// joinScorers and in OneGroup's cached doc/scores/count:
private final Map<Query,Integer> joinQueryID = new HashMap<>();
private final int numParentHits;
private final FieldValueHitQueue<OneGroup> queue;
private final FieldComparator<?>[] comparators;
private final boolean trackMaxScore;
private final boolean trackScores;
private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
private boolean queueFull;
private OneGroup bottom;
private int totalHitCount;
private float maxScore = Float.NaN;
/** Creates a ToParentBlockJoinCollector. The provided sort must
* not be null. If you pass true trackScores, all
* ToParentBlockQuery instances must not use
* ScoreMode.None. */
public ToParentBlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) {
// TODO: allow null sort to be specialized to relevance
// only collector
this.sort = sort;
this.trackMaxScore = trackMaxScore;
if (trackMaxScore) {
maxScore = Float.MIN_VALUE;
}
//System.out.println("numParentHits=" + numParentHits);
this.trackScores = trackScores;
this.numParentHits = numParentHits;
queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
comparators = queue.getComparators();
}
private static final class OneGroup extends FieldValueHitQueue.Entry {
public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
super(comparatorSlot, parentDoc, parentScore);
//System.out.println("make OneGroup parentDoc=" + parentDoc);
docs = new int[numJoins][];
for(int joinID=0;joinID<numJoins;joinID++) {
docs[joinID] = new int[5];
}
if (doScores) {
scores = new float[numJoins][];
for(int joinID=0;joinID<numJoins;joinID++) {
scores[joinID] = new float[5];
}
}
counts = new int[numJoins];
}
LeafReaderContext readerContext;
int[][] docs;
float[][] scores;
int[] counts;
}
@Override
public LeafCollector getLeafCollector(final LeafReaderContext context)
throws IOException {
final LeafFieldComparator[] comparators = queue.getComparators(context);
final int[] reverseMul = queue.getReverseMul();
final int docBase = context.docBase;
return new LeafCollector() {
private Scorer scorer;
@Override
public void setScorer(Scorer scorer) throws IOException {
//System.out.println("C.setScorer scorer=" + scorer);
// Since we invoke .score(), and the comparators likely
// do as well, cache it so it's only "really" computed
// once:
if (scorer instanceof ScoreCachingWrappingScorer == false) {
scorer = new ScoreCachingWrappingScorer(scorer);
}
this.scorer = scorer;
for (LeafFieldComparator comparator : comparators) {
comparator.setScorer(scorer);
}
Arrays.fill(joinScorers, null);
Queue<Scorer> queue = new LinkedList<>();
//System.out.println("\nqueue: add top scorer=" + scorer);
queue.add(scorer);
while ((scorer = queue.poll()) != null) {
//System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery());
if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
}
for (ChildScorer sub : scorer.getChildren()) {
//System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
queue.add(sub.child);
}
}
}
@Override
public void collect(int parentDoc) throws IOException {
//System.out.println("\nC parentDoc=" + parentDoc);
totalHitCount++;
float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
maxScore = Math.max(maxScore, score);
}
// TODO: we could sweep all joinScorers here and
// aggregate total child hit count, so we can fill this
// in getTopGroups (we wire it to 0 now)
if (queueFull) {
//System.out.println(" queueFull");
// Fastmatch: return if this hit is not competitive
int c = 0;
for (int i = 0; i < comparators.length; ++i) {
c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
if (c != 0) {
break;
}
}
if (c <= 0) { // in case of equality, this hit is not competitive as docs are visited in order
// Definitely not competitive.
//System.out.println(" skip");
return;
}
//System.out.println(" competes! doc=" + (docBase + parentDoc));
// This hit is competitive - replace bottom element in queue & adjustTop
for (LeafFieldComparator comparator : comparators) {
comparator.copy(bottom.slot, parentDoc);
}
if (!trackMaxScore && trackScores) {
score = scorer.score();
}
bottom.doc = docBase + parentDoc;
bottom.readerContext = context;
bottom.score = score;
copyGroups(bottom);
bottom = queue.updateTop();
for (LeafFieldComparator comparator : comparators) {
comparator.setBottom(bottom.slot);
}
} else {
// Startup transient: queue is not yet full:
final int comparatorSlot = totalHitCount - 1;
// Copy hit into queue
for (LeafFieldComparator comparator : comparators) {
comparator.copy(comparatorSlot, parentDoc);
}
//System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
if (!trackMaxScore && trackScores) {
score = scorer.score();
}
final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
og.readerContext = context;
copyGroups(og);
bottom = queue.add(og);
queueFull = totalHitCount == numParentHits;
if (queueFull) {
// End of startup transient: queue just filled up:
for (LeafFieldComparator comparator : comparators) {
comparator.setBottom(bottom.slot);
}
}
}
}
// Pulls out child doc and scores for all join queries:
private void copyGroups(OneGroup og) {
// While rare, it's possible top arrays could be too
// short if join query had null scorer on first
// segment(s) but then became non-null on later segments
final int numSubScorers = joinScorers.length;
if (og.docs.length < numSubScorers) {
// While rare, this could happen if join query had
// null scorer on first segment(s) but then became
// non-null on later segments
og.docs = ArrayUtil.grow(og.docs, numSubScorers);
}
if (og.counts.length < numSubScorers) {
og.counts = ArrayUtil.grow(og.counts);
}
if (trackScores && og.scores.length < numSubScorers) {
og.scores = ArrayUtil.grow(og.scores, numSubScorers);
}
//System.out.println("\ncopyGroups parentDoc=" + og.doc);
for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
//System.out.println(" scorer=" + joinScorer);
if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
og.counts[scorerIDX] = joinScorer.getChildCount();
//System.out.println(" count=" + og.counts[scorerIDX]);
og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
//System.out.println(" len=" + og.docs[scorerIDX].length);
/*
for(int idx=0;idx<og.counts[scorerIDX];idx++) {
System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
}
*/
if (trackScores) {
//System.out.println(" copy scores");
og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
}
} else {
og.counts[scorerIDX] = 0;
}
}
}
};
}
private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer) {
scorer.trackPendingChildHits();
final Integer slot = joinQueryID.get(query);
if (slot == null) {
joinQueryID.put(query, joinScorers.length);
//System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
final ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1+joinScorers.length];
System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length);
joinScorers = newArray;
joinScorers[joinScorers.length-1] = scorer;
} else {
joinScorers[slot] = scorer;
}
}
private OneGroup[] sortedGroups;
private void sortQueue() {
sortedGroups = new OneGroup[queue.size()];
for(int downTo=queue.size()-1;downTo>=0;downTo--) {
sortedGroups[downTo] = queue.pop();
}
}
/** Returns the TopGroups for the specified
* BlockJoinQuery. The groupValue of each GroupDocs will
* be the parent docID for that group.
* The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
* and number of matched child documents for that group.
* Returns null if no groups matched.
*
* @param query Search query
* @param withinGroupSort Sort criteria within groups
* @param offset Parent docs offset
* @param maxDocsPerGroup Upper bound of documents per group number
* @param withinGroupOffset Offset within each group of child docs
* @param fillSortFields Specifies whether to add sort fields or not
* @return TopGroups for specified query
* @throws IOException if there is a low-level I/O error
*/
public TopGroups<Integer> getTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
throws IOException {
final Integer _slot = joinQueryID.get(query);
if (_slot == null && totalHitCount == 0) {
return null;
}
if (sortedGroups == null) {
if (offset >= queue.size()) {
return null;
}
sortQueue();
} else if (offset > sortedGroups.length) {
return null;
}
return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
}
/**
* Accumulates groups for the BlockJoinQuery specified by its slot.
*
* @param slot Search query's slot
* @param offset Parent docs offset
* @param maxDocsPerGroup Upper bound of documents per group number
* @param withinGroupOffset Offset within each group of child docs
* @param withinGroupSort Sort criteria within groups
* @param fillSortFields Specifies whether to add sort fields or not
* @return TopGroups for the query specified by slot
* @throws IOException if there is a low-level I/O error
*/
@SuppressWarnings({"unchecked","rawtypes"})
private TopGroups<Integer> accumulateGroups(int slot, int offset, int maxDocsPerGroup,
int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException {
final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
final FakeScorer fakeScorer = new FakeScorer();
int totalGroupedHitCount = 0;
//System.out.println("slot=" + slot);
for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
final OneGroup og = sortedGroups[groupIDX];
final int numChildDocs;
if (slot == -1 || slot >= og.counts.length) {
numChildDocs = 0;
} else {
numChildDocs = og.counts[slot];
}
// Number of documents in group should be bounded to prevent redundant memory allocation
final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
//System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
// At this point we hold all docs w/ in each group,
// unsorted; we now sort them:
final TopDocsCollector<?> collector;
if (withinGroupSort == null) {
//System.out.println("sort by score");
// Sort by score
if (!trackScores) {
throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
}
collector = TopScoreDocCollector.create(numDocsInGroup);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore);
}
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
leafCollector.setScorer(fakeScorer);
for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
//System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
final int doc = og.docs[slot][docIDX];
fakeScorer.doc = doc;
if (trackScores) {
fakeScorer.score = og.scores[slot][docIDX];
}
leafCollector.collect(doc);
}
totalGroupedHitCount += numChildDocs;
final Object[] groupSortValues;
if (fillSortFields) {
groupSortValues = new Object[comparators.length];
for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
}
} else {
groupSortValues = null;
}
final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);
groups[groupIDX-offset] = new GroupDocs<>(og.score,
topDocs.getMaxScore(),
numChildDocs,
topDocs.scoreDocs,
og.doc,
groupSortValues);
}
return new TopGroups<>(new TopGroups<>(sort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
0, totalGroupedHitCount, groups, maxScore),
totalHitCount);
}
/** Returns the TopGroups for the specified BlockJoinQuery.
* The groupValue of each GroupDocs will be the parent docID for that group.
* The number of documents within each group
* equals to the total number of matched child documents for that group.
* Returns null if no groups matched.
*
* @param query Search query
* @param withinGroupSort Sort criteria within groups
* @param offset Parent docs offset
* @param withinGroupOffset Offset within each group of child docs
* @param fillSortFields Specifies whether to add sort fields or not
* @return TopGroups for specified query
* @throws IOException if there is a low-level I/O error
*/
public TopGroups<Integer> getTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
int withinGroupOffset, boolean fillSortFields)
throws IOException {
return getTopGroups(query, withinGroupSort, offset, Integer.MAX_VALUE, withinGroupOffset, fillSortFields);
}
/**
* Returns the highest score across all collected parent hits, as long as
* <code>trackMaxScores=true</code> was passed
* {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
* construction}. Else, this returns <code>Float.NaN</code>
*/
public float getMaxScore() {
return maxScore;
}
@Override
public boolean needsScores() {
// needed so that eg. BooleanQuery does not rewrite its MUST clauses to
// FILTER since the filter scorers are hidden in Scorer.getChildren().
return true;
}
}

View File

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.ExecutorService;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
/**
* An {@link IndexSearcher} to use in conjunction with
* {@link ToParentBlockJoinCollector}.
*/
public class ToParentBlockJoinIndexSearcher extends IndexSearcher {
/** Creates a searcher searching the provided index. Search on individual
* segments will be run in the provided {@link ExecutorService}.
* @see IndexSearcher#IndexSearcher(IndexReader, ExecutorService) */
public ToParentBlockJoinIndexSearcher(IndexReader r, ExecutorService executor) {
super(r, executor);
}
/** Creates a searcher searching the provided index.
* @see IndexSearcher#IndexSearcher(IndexReader) */
public ToParentBlockJoinIndexSearcher(IndexReader r) {
super(r);
}
@Override
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
for (LeafReaderContext ctx : leaves) { // search each subreader
// we force the use of Scorer (not BulkScorer) to make sure
// that the scorer passed to LeafCollector.setScorer supports
// Scorer.getChildren
Scorer scorer = weight.scorer(ctx);
if (scorer != null) {
final LeafCollector leafCollector = collector.getLeafCollector(ctx);
leafCollector.setScorer(scorer);
final Bits liveDocs = ctx.reader().getLiveDocs();
final DocIdSetIterator it = scorer.iterator();
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
if (liveDocs == null || liveDocs.get(doc)) {
leafCollector.collect(doc);
}
}
}
}
}
}

View File

@ -30,8 +30,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
/**
@ -57,20 +55,6 @@ import org.apache.lucene.util.BitSet;
* documents: the wrapped child query must never
* return a parent document.</p>
*
* If you'd like to retrieve {@link TopGroups} for the
* resulting query, use the {@link ToParentBlockJoinCollector}.
* Note that this is not necessary, ie, if you simply want
* to collect the parent documents and don't need to see
* which child documents matched under that parent, then
* you can use any collector.
*
* <p><b>NOTE</b>: If the overall query contains parent-only
* matches, for example you OR a parent-only query with a
* joined child-only query, then the resulting collected documents
* will be correct, however the {@link TopGroups} you get
* from {@link ToParentBlockJoinCollector} will not contain every
* child for parents that had matched.
*
* <p>See {@link org.apache.lucene.search.join} for an
* overview. </p>
*
@ -171,39 +155,7 @@ public class ToParentBlockJoinQuery extends Query {
}
}
/**
* Ascendant for {@link ToParentBlockJoinQuery}'s scorer.
* @lucene.experimental it might be removed at <b>6.0</b>
* */
public static abstract class ChildrenMatchesScorer extends Scorer{
/** inherited constructor */
protected ChildrenMatchesScorer(Weight weight) {
super(weight);
}
/**
* enables children matches recording
* */
public abstract void trackPendingChildHits() ;
/**
* reports matched children
* @return number of recorded matched children docs
* */
public abstract int getChildCount() ;
/**
* reports matched children
* @param other array for recording matching children docs of next parent,
* it might be null (that's slower) or the same array which was returned
* from the previous call
* @return array with {@link #getChildCount()} matched children docnums
* */
public abstract int[] swapChildDocs(int[] other);
}
static class BlockJoinScorer extends ChildrenMatchesScorer {
static class BlockJoinScorer extends Scorer {
private final Scorer childScorer;
private final BitSet parentBits;
private final ScoreMode scoreMode;
@ -212,8 +164,6 @@ public class ToParentBlockJoinQuery extends Query {
private float parentScore;
private int parentFreq;
private int nextChildDoc;
private int[] pendingChildDocs;
private float[] pendingChildScores;
private int childDocUpto;
public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
@ -230,39 +180,6 @@ public class ToParentBlockJoinQuery extends Query {
return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
}
@Override
public int getChildCount() {
return childDocUpto;
}
int getParentDoc() {
return parentDoc;
}
@Override
public int[] swapChildDocs(int[] other) {
final int[] ret = pendingChildDocs;
if (other == null) {
pendingChildDocs = new int[5];
} else {
pendingChildDocs = other;
}
return ret;
}
float[] swapChildScores(float[] other) {
if (scoreMode == ScoreMode.None) {
throw new IllegalStateException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
}
final float[] ret = pendingChildScores;
if (other == null) {
pendingChildScores = new float[5];
} else {
pendingChildScores = other;
}
return ret;
}
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
@ -297,22 +214,10 @@ public class ToParentBlockJoinQuery extends Query {
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs != null && pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
if (pendingChildScores != null && scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
if (pendingChildDocs != null) {
pendingChildDocs[childDocUpto] = nextChildDoc;
}
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final int childFreq = childScorer.freq();
if (pendingChildScores != null) {
pendingChildScores[childDocUpto] = childScore;
}
maxScore = Math.max(childScore, maxScore);
minScore = Math.min(childScore, minScore);
totalScore += childScore;
@ -440,17 +345,6 @@ public class ToParentBlockJoinQuery extends Query {
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
);
}
/**
* Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
*/
@Override
public void trackPendingChildHits() {
pendingChildDocs = new int[5];
if (scoreMode != ScoreMode.None) {
pendingChildScores = new float[5];
}
}
}
@Override

View File

@ -41,14 +41,25 @@
* resulting query can then be used as a clause in any query that
* matches parent.</p>
*
* <p>If you only care about the parent documents matching the query, you
* can use any collector to collect the parent hits, but if you'd also
* like to see which child documents match for each parent document,
* use the {@link org.apache.lucene.search.join.ToParentBlockJoinCollector} to collect the hits. Once the
* search is done, you retrieve a {@link
* org.apache.lucene.search.grouping.TopGroups} instance from the
* {@link org.apache.lucene.search.join.ToParentBlockJoinCollector#getTopGroups ToParentBlockJoinCollector.getTopGroups()} method.</p>
*
* <p>If you care about what child documents matched for each parent document,
* then use the {@link org.apache.lucene.search.join.ParentChildrenBlockJoinQuery} query to
* per matched parent document retrieve the child documents that caused to match the
* parent document in first place. This query should be used after your main query
* has been executed. For each hit execute the the
* {@link org.apache.lucene.search.join.ParentChildrenBlockJoinQuery} query </p>
* <pre class="prettyprint">
* TopDocs results = searcher.search(mainQuery, 10);
* for (int i = 0; i &lt; results.scoreDocs.length; i++) {
* ScoreDoc scoreDoc = results.scoreDocs[i];
*
* // Run ParentChildrenBlockJoinQuery to figure out the top matching child docs:
* ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery =
* new ParentChildrenBlockJoinQuery(parentFilter, childQuery, scoreDoc.doc);
* TopDocs topChildResults = searcher.search(parentChildrenBlockJoinQuery, 3);
* // Process top child hits...
* }
* </pre>
*
* <p>To map/join in the opposite direction, use {@link
* org.apache.lucene.search.join.ToChildBlockJoinQuery}. This wraps
* any query matching parent documents, creating the joined query

View File

@ -22,15 +22,18 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
@ -47,30 +50,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.RandomApproximationQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
@ -157,7 +138,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = new IndexSearcher(r);
IndexSearcher s = newSearcher(r);
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
@ -170,18 +151,21 @@ public class TestBlockJoin extends LuceneTestCase {
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
s.search(fullQuery.build(), c);
TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
assertFalse(Float.isNaN(results.maxScore));
assertEquals(1, results.totalGroupedHitCount);
assertEquals(1, results.groups.length);
final GroupDocs<Integer> group = results.groups[0];
Document childDoc = s.doc(group.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
assertEquals("Lisa", parentDoc.get("name"));
TopDocs topDocs = s.search(fullQuery.build(), 2);
assertEquals(2, topDocs.totalHits);
assertEquals(asSet("Lisa", "Frank"),
asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
@ -207,8 +191,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
w.close();
IndexSearcher s = new ToParentBlockJoinIndexSearcher(r);
//IndexSearcher s = newSearcher(r, false);
IndexSearcher s = newSearcher(r, false);
//IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
@ -232,23 +215,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.SHOULD));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.SHOULD));
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
s.search(fullQuery.build(), c);
TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
assertEquals(1, results.totalGroupedHitCount);
assertEquals(1, results.groups.length);
final TopDocs topDocs = s.search(fullQuery.build(), 2);
assertEquals(2, topDocs.totalHits);
assertEquals(asSet("Lisa", "Frank"),
asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
final GroupDocs<Integer> group = results.groups[0];
assertEquals(1, group.totalHits);
assertFalse(Float.isNaN(group.score));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
Document childDoc = s.doc(group.scoreDocs[0].doc);
//System.out.println(" doc=" + group.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
assertEquals("Lisa", parentDoc.get("name"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
@ -297,30 +278,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
CheckHits.checkHitCollector(random(), fullQuery.build(), "country", s, new int[] {2});
s.search(fullQuery.build(), c);
TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
assertFalse(Float.isNaN(results.maxScore));
TopDocs topDocs = s.search(fullQuery.build(), 1);
//assertEquals(1, results.totalHitCount);
assertEquals(1, results.totalGroupedHitCount);
assertEquals(1, results.groups.length);
final GroupDocs<Integer> group = results.groups[0];
assertEquals(1, group.totalHits);
assertFalse(Float.isNaN(group.score));
Document childDoc = s.doc(group.scoreDocs[0].doc);
//System.out.println(" doc=" + group.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
assertEquals(1, topDocs.totalHits);
Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
//System.out.println("TEST: now test up");
@ -333,7 +305,7 @@ public class TestBlockJoin extends LuceneTestCase {
//System.out.println("FULL: " + fullChildQuery);
TopDocs hits = s.search(fullChildQuery.build(), 10);
assertEquals(1, hits.totalHits);
childDoc = s.doc(hits.scoreDocs[0].doc);
Document childDoc = s.doc(hits.scoreDocs[0].doc);
//System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, childDoc.getField("year").numericValue());
@ -347,72 +319,6 @@ public class TestBlockJoin extends LuceneTestCase {
dir.close();
}
public void testBugCausedByRewritingTwice() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
for (int i=0;i<10;i++) {
docs.clear();
docs.add(makeJob("ruby", i));
docs.add(makeJob("java", 2007));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Hacky: this causes the query to need 2 rewrite
// iterations:
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(IntPoint.newExactQuery("year", 2007), BooleanClause.Occur.MUST);
Query qc = new Query() {
@Override
public Query rewrite(IndexReader reader) throws IOException {
return builder.build();
}
@Override
public String toString(String field) {
return "hack!";
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
Query qw1 = qc.rewrite(r);
Query qw2 = qw1.rewrite(r);
assertNotSame(qc, qw1);
assertNotSame(qw1, qw2);
ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
s.search(qp, c);
TopGroups<Integer> groups = c.getTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
for (GroupDocs<Integer> group : groups.groups) {
assertEquals(1, group.totalHits);
}
r.close();
dir.close();
}
protected Query skill(String skill) {
return new TermQuery(new Term("skill", skill));
}
@ -612,6 +518,7 @@ public class TestBlockJoin extends LuceneTestCase {
final Directory dir = newDirectory();
final Directory joinDir = newDirectory();
final int maxNumChildrenPerParent = 20;
final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
@ -669,7 +576,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println(" " + sb.toString());
}
final int numChildDocs = TestUtil.nextInt(random(), 1, 20);
final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
for(int childDocID=0;childDocID<numChildDocs;childDocID++) {
// Denormalize: copy all parent fields into child doc:
Document childDoc = TestUtil.cloneDocument(parentDoc);
@ -752,7 +659,7 @@ public class TestBlockJoin extends LuceneTestCase {
final IndexSearcher s = newSearcher(r, false);
final IndexSearcher joinS = new IndexSearcher(joinR);
final IndexSearcher joinS = newSearcher(joinR);
final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
@ -880,55 +787,35 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
final boolean trackScores;
final boolean trackMaxScore;
if (agg == ScoreMode.None) {
trackScores = false;
trackMaxScore = false;
} else {
trackScores = random().nextBoolean();
trackMaxScore = random().nextBoolean();
TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
for (ScoreDoc parentHit : joinedResults.scoreDocs) {
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
final Document parentDoc = joinS.doc(parentHit.doc);
joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
}
final ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
joinS.search(parentJoinQuery, c);
final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
//final int hitsPerGroup = 100;
final TopGroups<Integer> joinResults = c.getTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
if (VERBOSE) {
System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.groups.length) + " groups; hitsPerGroup=" + hitsPerGroup);
System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null) {
final GroupDocs<Integer>[] groups = joinResults.groups;
for(int groupIDX=0;groupIDX<groups.length;groupIDX++) {
final GroupDocs<Integer> group = groups[groupIDX];
if (group.groupSortValues != null) {
System.out.print(" ");
for(Object o : group.groupSortValues) {
if (o instanceof BytesRef) {
System.out.print(((BytesRef) o).utf8ToString() + " ");
} else {
System.out.print(o + " ");
}
}
System.out.println();
}
assertNotNull(group.groupValue);
final Document parentDoc = joinS.doc(group.groupValue);
System.out.println(" group parentID=" + parentDoc.get("parentID") + " (docID=" + group.groupValue + ")");
for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) {
final Document doc = joinS.doc(group.scoreDocs[hitIDX].doc);
//System.out.println(" score=" + group.scoreDocs[hitIDX].score + " childID=" + doc.get("childID") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
System.out.println(" group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
for(ScoreDoc childHit : entry.getValue().scoreDocs) {
final Document doc = joinS.doc(childHit.doc);
// System.out.println(" score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
}
}
}
}
if (results.totalHits == 0) {
assertNull(joinResults);
assertEquals(0, joinResults.size());
} else {
compareHits(r, joinR, results, joinResults);
TopDocs b = joinS.search(childJoinQuery, 10);
@ -1115,43 +1002,24 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
private void compareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<Integer> joinResults) throws Exception {
// results is 'complete'; joinResults is a subset
int resultUpto = 0;
int joinGroupUpto = 0;
final ScoreDoc[] hits = results.scoreDocs;
final GroupDocs<Integer>[] groupDocs = joinResults.groups;
while(joinGroupUpto < groupDocs.length) {
final GroupDocs<Integer> group = groupDocs[joinGroupUpto++];
final ScoreDoc[] groupHits = group.scoreDocs;
assertNotNull(group.groupValue);
final Document parentDoc = joinR.document(group.groupValue);
final String parentID = parentDoc.get("parentID");
//System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
assertNotNull(parentID);
assertTrue(groupHits.length > 0);
for(int hitIDX=0;hitIDX<groupHits.length;hitIDX++) {
final Document nonJoinHit = r.document(hits[resultUpto++].doc);
final Document joinHit = joinR.document(groupHits[hitIDX].doc);
assertEquals(parentID,
nonJoinHit.get("parentID"));
assertEquals(joinHit.get("childID"),
nonJoinHit.get("childID"));
private void compareHits(IndexReader r, IndexReader joinR, TopDocs controlHits, Map<Integer, TopDocs> joinResults) throws Exception {
int currentParentID = -1;
int childHitSlot = 0;
TopDocs childHits = new TopDocs(0, new ScoreDoc[0], 0f);
for (ScoreDoc controlHit : controlHits.scoreDocs) {
Document controlDoc = r.document(controlHit.doc);
int parentID = Integer.valueOf(controlDoc.get("parentID"));
if (parentID != currentParentID) {
assertEquals(childHitSlot, childHits.scoreDocs.length);
currentParentID = parentID;
childHitSlot = 0;
childHits = joinResults.get(parentID);
}
if (joinGroupUpto < groupDocs.length) {
// Advance non-join hit to the next parentID:
//System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.length=" + groupDocs.length + " parentID=" + parentID);
while(true) {
assertTrue(resultUpto < hits.length);
if (!parentID.equals(r.document(hits[resultUpto].doc).get("parentID"))) {
break;
}
resultUpto++;
}
}
String controlChildID = controlDoc.get("childID");
Document childDoc = joinR.document(childHits.scoreDocs[childHitSlot++].doc);
String childID = childDoc.get("childID");
assertEquals(controlChildID, childID);
}
}
@ -1200,43 +1068,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
// Collects all job and qualification child docs for
// each resume hit in the top N (sorted by score):
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
s.search(fullQuery.build(), c);
// Examine "Job" children
TopGroups<Integer> jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
//assertEquals(1, results.totalHitCount);
assertEquals(1, jobResults.totalGroupedHitCount);
assertEquals(1, jobResults.groups.length);
final GroupDocs<Integer> group = jobResults.groups[0];
assertEquals(1, group.totalHits);
Document childJobDoc = s.doc(group.scoreDocs[0].doc);
//System.out.println(" doc=" + group.scoreDocs[0].doc);
assertEquals("java", childJobDoc.get("skill"));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
final TopDocs topDocs = s.search(fullQuery.build(), 10);
assertEquals(1, topDocs.totalHits);
Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
// Now Examine qualification children
TopGroups<Integer> qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childJobQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
assertEquals(1, qualificationResults.totalGroupedHitCount);
assertEquals(1, qualificationResults.groups.length);
final GroupDocs<Integer> qGroup = qualificationResults.groups[0];
assertEquals(1, qGroup.totalHits);
Document childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc);
assertEquals("maths", childQualificationDoc.get("qualification"));
assertNotNull(qGroup.groupValue);
parentDoc = s.doc(qGroup.groupValue);
assertEquals("Lisa", parentDoc.get("name"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQualificationQuery.build(), topDocs.scoreDocs[0].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits);
assertEquals("maths", s.doc(matchingChildren.scoreDocs[0].doc).get("qualification"));
r.close();
dir.close();
@ -1300,165 +1146,6 @@ public class TestBlockJoin extends LuceneTestCase {
dir.close();
}
public void testGetTopGroups() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("ruby", 2005));
docs.add(makeJob("java", 2006));
docs.add(makeJob("java", 2010));
docs.add(makeJob("java", 2012));
Collections.shuffle(docs, random());
docs.add(makeResume("Frank", "United States"));
addSkillless(w);
w.addDocuments(docs);
addSkillless(w);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(s.getIndexReader(), parentsFilter);
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
s.search(childJoinQuery, c);
//Get all child documents within groups
@SuppressWarnings({"unchecked","rawtypes"})
TopGroups<Integer>[] getTopGroupsResults = new TopGroups[2];
getTopGroupsResults[0] = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
getTopGroupsResults[1] = c.getTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
for (TopGroups<Integer> results : getTopGroupsResults) {
assertFalse(Float.isNaN(results.maxScore));
assertEquals(2, results.totalGroupedHitCount);
assertEquals(1, results.groups.length);
final GroupDocs<Integer> group = results.groups[0];
assertEquals(2, group.totalHits);
assertFalse(Float.isNaN(group.score));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
assertEquals("Frank", parentDoc.get("name"));
assertEquals(2, group.scoreDocs.length); //all matched child documents collected
for (ScoreDoc scoreDoc : group.scoreDocs) {
Document childDoc = s.doc(scoreDoc.doc);
assertEquals("java", childDoc.get("skill"));
int year = Integer.parseInt(childDoc.get("year"));
assertTrue(year >= 2006 && year <= 2011);
}
}
//Get part of child documents
TopGroups<Integer> boundedResults = c.getTopGroups(childJoinQuery, null, 0, 1, 0, true);
assertFalse(Float.isNaN(boundedResults.maxScore));
assertEquals(2, boundedResults.totalGroupedHitCount);
assertEquals(1, boundedResults.groups.length);
final GroupDocs<Integer> group = boundedResults.groups[0];
assertEquals(2, group.totalHits);
assertFalse(Float.isNaN(group.score));
assertNotNull(group.groupValue);
Document parentDoc = s.doc(group.groupValue);
assertEquals("Frank", parentDoc.get("name"));
assertEquals(1, group.scoreDocs.length); //not all matched child documents collected
for (ScoreDoc scoreDoc : group.scoreDocs) {
Document childDoc = s.doc(scoreDoc.doc);
assertEquals("java", childDoc.get("skill"));
int year = Integer.parseInt(childDoc.get("year"));
assertTrue(year >= 2006 && year <= 2011);
}
r.close();
dir.close();
}
// LUCENE-4968
public void testSometimesParentOnlyMatches() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Document parent = new Document();
parent.add(new StoredField("parentID", "0"));
parent.add(new SortedDocValuesField("parentID", new BytesRef("0")));
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
List<Document> docs = new ArrayList<>();
Document child = new Document();
docs.add(child);
child.add(new StoredField("childID", "0"));
child.add(newTextField("childText", "text", Field.Store.NO));
// parent last:
docs.add(parent);
w.addDocuments(docs);
docs.clear();
parent = new Document();
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
parent.add(new StoredField("parentID", "1"));
parent.add(new SortedDocValuesField("parentID", new BytesRef("1")));
// parent last:
docs.add(parent);
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
Query childQuery = new TermQuery(new Term("childText", "text"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
parentQuery.add(childJoinQuery, Occur.SHOULD);
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
10, true, true);
searcher.search(parentQuery.build(), c);
TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
// Two parents:
assertEquals(2, groups.totalGroupCount.intValue());
// One child docs:
assertEquals(1, groups.totalGroupedHitCount);
GroupDocs<Integer> group = groups.groups[0];
Document doc = r.document(group.groupValue.intValue());
assertEquals("0", doc.get("parentID"));
group = groups.groups[1];
doc = r.document(group.groupValue.intValue());
assertEquals("1", doc.get("parentID"));
r.close();
d.close();
}
// LUCENE-4968
public void testChildQueryNeverMatches() throws Exception {
Directory d = newDirectory();
@ -1496,90 +1183,25 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
w.close();
IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
IndexSearcher searcher = newSearcher(r);
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
parentQuery.add(childJoinQuery, Occur.SHOULD);
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
10, true, true);
searcher.search(parentQuery.build(), c);
TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
Weight weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(scorer);
// Two parents:
assertEquals(2, groups.totalGroupCount.intValue());
// One child docs:
assertEquals(0, groups.totalGroupedHitCount);
GroupDocs<Integer> group = groups.groups[0];
Document doc = r.document(group.groupValue.intValue());
assertEquals("0", doc.get("parentID"));
group = groups.groups[1];
doc = r.document(group.groupValue.intValue());
assertEquals("1", doc.get("parentID"));
r.close();
d.close();
}
// LUCENE-4968
public void testChildQueryMatchesParent() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Document parent = new Document();
parent.add(new StoredField("parentID", "0"));
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
List<Document> docs = new ArrayList<>();
Document child = new Document();
docs.add(child);
child.add(new StoredField("childID", "0"));
child.add(newTextField("childText", "text", Field.Store.NO));
// parent last:
docs.add(parent);
w.addDocuments(docs);
docs.clear();
parent = new Document();
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
parent.add(new StoredField("parentID", "1"));
// parent last:
docs.add(parent);
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
// illegally matches parent:
Query childQuery = new TermQuery(new Term("parentText", "text"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
parentQuery.add(childJoinQuery, Occur.SHOULD);
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
10, true, true);
expectThrows(IllegalStateException.class, () -> {
newSearcher(r).search(parentQuery.build(), c);
});
// never matches and produces a null scorer
childQuery = new TermQuery(new Term("bogus", "bogus"));
childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(scorer);
r.close();
d.close();

View File

@ -454,59 +454,7 @@ public class TestJoinUtil extends LuceneTestCase {
public void testMinMaxScore() throws Exception {
String priceField = "price";
// FunctionQuery would be helpful, but join module doesn't depend on queries module.
Query priceQuery = new Query() {
private final Query fieldQuery = new FieldValueQuery(priceField);
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
Weight fieldWeight = fieldQuery.createWeight(searcher, false, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Scorer fieldScorer = fieldWeight.scorer(context);
if (fieldScorer == null) {
return null;
}
NumericDocValues price = context.reader().getNumericDocValues(priceField);
return new FilterScorer(fieldScorer, this) {
@Override
public float score() throws IOException {
assertEquals(in.docID(), price.nextDoc());
return (float) price.longValue();
}
};
}
};
}
@Override
public String toString(String field) {
return fieldQuery.toString(field);
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
Query priceQuery = numericDocValuesScoreQuery(priceField);
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
@ -579,6 +527,62 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close();
}
// FunctionQuery would be helpful, but join module doesn't depend on queries module.
static Query numericDocValuesScoreQuery(final String field) {
return new Query() {
private final Query fieldQuery = new FieldValueQuery(field);
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
Weight fieldWeight = fieldQuery.createWeight(searcher, false, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Scorer fieldScorer = fieldWeight.scorer(context);
if (fieldScorer == null) {
return null;
}
NumericDocValues price = context.reader().getNumericDocValues(field);
return new FilterScorer(fieldScorer, this) {
@Override
public float score() throws IOException {
assertEquals(in.docID(), price.advance(in.docID()));
return (float) price.longValue();
}
};
}
};
}
@Override
public String toString(String field) {
return fieldQuery.toString(field);
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
}
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(

View File

@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestParentChildrenBlockJoinQuery extends LuceneTestCase {
public void testParentChildrenBlockJoinQuery() throws Exception {
int numParentDocs = 8 + random().nextInt(8);
int maxChildDocsPerParent = 8 + random().nextInt(8);
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numParentDocs; i++) {
int numChildDocs = random().nextInt(maxChildDocsPerParent);
List<Document> docs = new ArrayList<>(numChildDocs + 1);
for (int j = 0; j < numChildDocs; j++) {
Document childDoc = new Document();
childDoc.add(new StringField("type", "child", Field.Store.NO));
childDoc.add(new NumericDocValuesField("score", j + 1));
docs.add(childDoc);
}
Document parenDoc = new Document();
parenDoc.add(new StringField("type", "parent", Field.Store.NO));
parenDoc.add(new NumericDocValuesField("num_child_docs", numChildDocs));
docs.add(parenDoc);
writer.addDocuments(docs);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
Query childQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term("type", "child")), BooleanClause.Occur.FILTER)
.add(TestJoinUtil.numericDocValuesScoreQuery("score"), BooleanClause.Occur.SHOULD)
.build();
TopDocs parentDocs = searcher.search(new TermQuery(new Term("type", "parent")), numParentDocs);
assertEquals(parentDocs.scoreDocs.length, numParentDocs);
for (ScoreDoc parentScoreDoc : parentDocs.scoreDocs) {
LeafReaderContext leafReader = reader.leaves().get(ReaderUtil.subIndex(parentScoreDoc.doc, reader.leaves()));
NumericDocValues numericDocValuesField = leafReader.reader().getNumericDocValues("num_child_docs");
numericDocValuesField.advance(parentScoreDoc.doc - leafReader.docBase);
long expectedChildDocs = numericDocValuesField.longValue();
ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery =
new ParentChildrenBlockJoinQuery(parentFilter, childQuery, parentScoreDoc.doc);
TopDocs topDocs = searcher.search(parentChildrenBlockJoinQuery, maxChildDocsPerParent);
assertEquals(expectedChildDocs, topDocs.totalHits);
if (expectedChildDocs > 0) {
assertEquals(expectedChildDocs, topDocs.getMaxScore(), 0);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc childScoreDoc = topDocs.scoreDocs[i];
assertEquals(expectedChildDocs - i, childScoreDoc.score, 0);
}
}
}
reader.close();
dir.close();
}
}

View File

@ -19,11 +19,9 @@ package org.apache.solr.search.join;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.join.ToParentBlockJoinQuery.ChildrenMatchesScorer;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggDocIterator;
/**
* For each collected parent document creates matched block, which is a docSet with matched children and parent doc
@ -32,8 +30,6 @@ import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggD
class BlockJoinFacetAccsHolder {
private BlockJoinFieldFacetAccumulator[] blockJoinFieldFacetAccumulators;
private boolean firstSegment = true;
private ChildrenMatchesScorer blockJoinScorer;
private int[] childDocs = new int[0];
BlockJoinFacetAccsHolder(SolrQueryRequest req) throws IOException {
String[] facetFieldNames = BlockJoinFacetComponentSupport.getChildFacetFields(req);
@ -61,16 +57,6 @@ class BlockJoinFacetAccsHolder {
}
}
protected void incrementFacets(int parent) throws IOException {
final int[] docNums = blockJoinScorer.swapChildDocs(childDocs);
// now we don't
//includeParentDoc(parent);
//final int childCountPlusParent = childTracking.getChildCount()+1;
final int childCountNoParent = blockJoinScorer.getChildCount();
final SortedIntsAggDocIterator iter = new SortedIntsAggDocIterator(docNums, childCountNoParent, parent);
countFacets(iter);
}
/** is not used
protected int[] includeParentDoc(int parent) {
final int[] docNums = ArrayUtil.grow(childTracking.getChildDocs(), childTracking.getChildCount()+1);