mirror of https://github.com/apache/lucene.git
LUCENE-4968: fix several block join issues
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1477570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
20f2f39170
commit
e5b6c85074
|
@ -68,6 +68,11 @@ Bug Fixes
|
|||
* LUCENE-4959: Fix incorrect return value in
|
||||
SimpleNaiveBayesClassifier.assignClass. (Alexey Kutin via Adrien Grand)
|
||||
|
||||
* LUCENE-4968: Fixed ToParentBlockJoinQuery/Collector: correctly handle parent
|
||||
hits that had no child matches, don't throw IllegalArgumentEx when
|
||||
the child query has no hits, more aggressively catch cases where childQuery
|
||||
incorrectly matches parent documents (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher
|
||||
|
|
|
@ -111,6 +111,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
if (trackMaxScore) {
|
||||
maxScore = Float.MIN_VALUE;
|
||||
}
|
||||
//System.out.println("numParentHits=" + numParentHits);
|
||||
this.trackScores = trackScores;
|
||||
this.numParentHits = numParentHits;
|
||||
queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
|
||||
|
@ -122,6 +123,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
private static final class OneGroup extends FieldValueHitQueue.Entry {
|
||||
public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
|
||||
super(comparatorSlot, parentDoc, parentScore);
|
||||
//System.out.println("make OneGroup parentDoc=" + parentDoc);
|
||||
docs = new int[numJoins][];
|
||||
for(int joinID=0;joinID<numJoins;joinID++) {
|
||||
docs[joinID] = new int[5];
|
||||
|
@ -142,7 +144,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
|
||||
@Override
|
||||
public void collect(int parentDoc) throws IOException {
|
||||
//System.out.println("C parentDoc=" + parentDoc);
|
||||
//System.out.println("\nC parentDoc=" + parentDoc);
|
||||
totalHitCount++;
|
||||
|
||||
float score = Float.NaN;
|
||||
|
@ -203,8 +205,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].copy(comparatorSlot, parentDoc);
|
||||
}
|
||||
//System.out.println(" startup: new OG doc=" +
|
||||
//(docBase+parentDoc));
|
||||
//System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
|
||||
if (!trackMaxScore && trackScores) {
|
||||
score = scorer.score();
|
||||
}
|
||||
|
@ -241,22 +242,28 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
og.scores = ArrayUtil.grow(og.scores);
|
||||
}
|
||||
|
||||
//System.out.println("copyGroups parentDoc=" + og.doc);
|
||||
//System.out.println("\ncopyGroups parentDoc=" + og.doc);
|
||||
for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
|
||||
final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
|
||||
//System.out.println(" scorer=" + joinScorer);
|
||||
if (joinScorer != null) {
|
||||
if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
|
||||
og.counts[scorerIDX] = joinScorer.getChildCount();
|
||||
//System.out.println(" count=" + og.counts[scorerIDX]);
|
||||
og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
|
||||
assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
|
||||
//System.out.println(" len=" + og.docs[scorerIDX].length);
|
||||
/*
|
||||
for(int idx=0;idx<og.counts[scorerIDX];idx++) {
|
||||
for(int idx=0;idx<og.counts[scorerIDX];idx++) {
|
||||
System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
if (trackScores) {
|
||||
//System.out.println(" copy scores");
|
||||
og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
|
||||
assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
|
||||
}
|
||||
} else {
|
||||
og.counts[scorerIDX] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -302,13 +309,16 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
Arrays.fill(joinScorers, null);
|
||||
|
||||
Queue<Scorer> queue = new LinkedList<Scorer>();
|
||||
//System.out.println("\nqueue: add top scorer=" + scorer);
|
||||
queue.add(scorer);
|
||||
while ((scorer = queue.poll()) != null) {
|
||||
//System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery());
|
||||
if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
|
||||
enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
|
||||
}
|
||||
|
||||
for (ChildScorer sub : scorer.getChildren()) {
|
||||
//System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
|
||||
queue.add(sub.child);
|
||||
}
|
||||
}
|
||||
|
@ -384,12 +394,8 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
throws IOException {
|
||||
|
||||
final Integer _slot = joinQueryID.get(query);
|
||||
if (_slot == null) {
|
||||
if (totalHitCount == 0) {
|
||||
return null;
|
||||
} else {
|
||||
throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
|
||||
}
|
||||
if (_slot == null && totalHitCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (sortedGroups == null) {
|
||||
|
@ -401,7 +407,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
return null;
|
||||
}
|
||||
|
||||
return accumulateGroups(_slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
|
||||
return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -423,18 +429,26 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
final FakeScorer fakeScorer = new FakeScorer();
|
||||
|
||||
int totalGroupedHitCount = 0;
|
||||
//System.out.println("slot=" + slot);
|
||||
|
||||
for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
|
||||
final OneGroup og = sortedGroups[groupIDX];
|
||||
final int numChildDocs = og.counts[slot];
|
||||
final int numChildDocs;
|
||||
if (slot == -1 || slot >= og.counts.length) {
|
||||
numChildDocs = 0;
|
||||
} else {
|
||||
numChildDocs = og.counts[slot];
|
||||
}
|
||||
|
||||
// Number of documents in group should be bounded to prevent redundant memory allocation
|
||||
final int numDocsInGroup = Math.min(numChildDocs, maxDocsPerGroup);
|
||||
final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
|
||||
//System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
|
||||
|
||||
// At this point we hold all docs w/ in each group,
|
||||
// unsorted; we now sort them:
|
||||
final TopDocsCollector<?> collector;
|
||||
if (withinGroupSort == null) {
|
||||
//System.out.println("sort by score");
|
||||
// Sort by score
|
||||
if (!trackScores) {
|
||||
throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
|
||||
|
@ -448,6 +462,7 @@ public class ToParentBlockJoinCollector extends Collector {
|
|||
collector.setScorer(fakeScorer);
|
||||
collector.setNextReader(og.readerContext);
|
||||
for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
|
||||
//System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
|
||||
final int doc = og.docs[slot][docIDX];
|
||||
fakeScorer.doc = doc;
|
||||
if (trackScores) {
|
||||
|
|
|
@ -195,10 +195,8 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||
BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
if (scorer.advance(doc) == doc) {
|
||||
return scorer.explain(context.docBase);
|
||||
}
|
||||
if (scorer != null && scorer.advance(doc) == doc) {
|
||||
return scorer.explain(context.docBase);
|
||||
}
|
||||
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||
}
|
||||
|
@ -246,6 +244,10 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
return childDocUpto;
|
||||
}
|
||||
|
||||
int getParentDoc() {
|
||||
return parentDoc;
|
||||
}
|
||||
|
||||
int[] swapChildDocs(int[] other) {
|
||||
final int[] ret = pendingChildDocs;
|
||||
if (other == null) {
|
||||
|
@ -272,7 +274,6 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
|
||||
|
||||
// Loop until we hit a parentDoc that's accepted
|
||||
while (true) {
|
||||
if (nextChildDoc == NO_MORE_DOCS) {
|
||||
|
@ -285,6 +286,12 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
|
||||
parentDoc = parentBits.nextSetBit(nextChildDoc);
|
||||
|
||||
// Parent & child docs are supposed to be
|
||||
// orthogonal:
|
||||
if (nextChildDoc == parentDoc) {
|
||||
throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
|
||||
}
|
||||
|
||||
//System.out.println(" parentDoc=" + parentDoc);
|
||||
assert parentDoc != -1;
|
||||
|
||||
|
@ -295,6 +302,13 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
do {
|
||||
nextChildDoc = childScorer.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
|
||||
// Parent & child docs are supposed to be
|
||||
// orthogonal:
|
||||
if (nextChildDoc == parentDoc) {
|
||||
throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -326,8 +340,11 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
nextChildDoc = childScorer.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
assert nextChildDoc != parentDoc;
|
||||
// Parent & child docs are supposed to be
|
||||
// orthogonal:
|
||||
if (nextChildDoc == parentDoc) {
|
||||
throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
|
||||
}
|
||||
|
||||
switch(scoreMode) {
|
||||
case Avg:
|
||||
|
@ -343,7 +360,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" return parentDoc=" + parentDoc);
|
||||
//System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
|
||||
return parentDoc;
|
||||
}
|
||||
}
|
||||
|
@ -393,7 +410,9 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
assert nextChildDoc != prevParentDoc;
|
||||
if (nextChildDoc == prevParentDoc) {
|
||||
throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
|
||||
}
|
||||
|
||||
final int nd = nextDoc();
|
||||
//System.out.println(" return nextParentDoc=" + nd);
|
||||
|
|
|
@ -1206,4 +1206,191 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-4968
|
||||
public void testSometimesParentOnlyMatches() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
Document parent = new Document();
|
||||
parent.add(new StoredField("parentID", "0"));
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
|
||||
List<Document> docs = new ArrayList<Document>();
|
||||
|
||||
Document child = new Document();
|
||||
docs.add(child);
|
||||
child.add(new StoredField("childID", "0"));
|
||||
child.add(newTextField("childText", "text", Field.Store.NO));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
docs.clear();
|
||||
|
||||
parent = new Document();
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
parent.add(new StoredField("parentID", "1"));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
Query childQuery = new TermQuery(new Term("childText", "text"));
|
||||
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
|
||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
|
||||
BooleanQuery parentQuery = new BooleanQuery();
|
||||
parentQuery.add(childJoinQuery, Occur.SHOULD);
|
||||
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
|
||||
|
||||
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
|
||||
10, true, true);
|
||||
newSearcher(r).search(parentQuery, c);
|
||||
TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
|
||||
|
||||
// Two parents:
|
||||
assertEquals(2, groups.totalGroupCount.intValue());
|
||||
|
||||
// One child docs:
|
||||
assertEquals(1, groups.totalGroupedHitCount);
|
||||
|
||||
GroupDocs<Integer> group = groups.groups[0];
|
||||
StoredDocument doc = r.document(group.groupValue.intValue());
|
||||
assertEquals("0", doc.get("parentID"));
|
||||
System.out.println("group: " + group);
|
||||
|
||||
group = groups.groups[1];
|
||||
doc = r.document(group.groupValue.intValue());
|
||||
assertEquals("1", doc.get("parentID"));
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
// LUCENE-4968
|
||||
public void testChildQueryNeverMatches() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
Document parent = new Document();
|
||||
parent.add(new StoredField("parentID", "0"));
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
|
||||
List<Document> docs = new ArrayList<Document>();
|
||||
|
||||
Document child = new Document();
|
||||
docs.add(child);
|
||||
child.add(new StoredField("childID", "0"));
|
||||
child.add(newTextField("childText", "text", Field.Store.NO));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
docs.clear();
|
||||
|
||||
parent = new Document();
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
parent.add(new StoredField("parentID", "1"));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// never matches:
|
||||
Query childQuery = new TermQuery(new Term("childText", "bogus"));
|
||||
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
|
||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
|
||||
BooleanQuery parentQuery = new BooleanQuery();
|
||||
parentQuery.add(childJoinQuery, Occur.SHOULD);
|
||||
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
|
||||
|
||||
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
|
||||
10, true, true);
|
||||
newSearcher(r).search(parentQuery, c);
|
||||
TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
|
||||
|
||||
// Two parents:
|
||||
assertEquals(2, groups.totalGroupCount.intValue());
|
||||
|
||||
// One child docs:
|
||||
assertEquals(0, groups.totalGroupedHitCount);
|
||||
|
||||
GroupDocs<Integer> group = groups.groups[0];
|
||||
StoredDocument doc = r.document(group.groupValue.intValue());
|
||||
assertEquals("0", doc.get("parentID"));
|
||||
System.out.println("group: " + group);
|
||||
|
||||
group = groups.groups[1];
|
||||
doc = r.document(group.groupValue.intValue());
|
||||
assertEquals("1", doc.get("parentID"));
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
// LUCENE-4968
|
||||
public void testChildQueryMatchesParent() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
Document parent = new Document();
|
||||
parent.add(new StoredField("parentID", "0"));
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
|
||||
List<Document> docs = new ArrayList<Document>();
|
||||
|
||||
Document child = new Document();
|
||||
docs.add(child);
|
||||
child.add(new StoredField("childID", "0"));
|
||||
child.add(newTextField("childText", "text", Field.Store.NO));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
docs.clear();
|
||||
|
||||
parent = new Document();
|
||||
parent.add(newTextField("parentText", "text", Field.Store.NO));
|
||||
parent.add(newStringField("isParent", "yes", Field.Store.NO));
|
||||
parent.add(new StoredField("parentID", "1"));
|
||||
|
||||
// parent last:
|
||||
docs.add(parent);
|
||||
w.addDocuments(docs);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// illegally matches parent:
|
||||
Query childQuery = new TermQuery(new Term("parentText", "text"));
|
||||
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
|
||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
|
||||
BooleanQuery parentQuery = new BooleanQuery();
|
||||
parentQuery.add(childJoinQuery, Occur.SHOULD);
|
||||
parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
|
||||
|
||||
ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
|
||||
10, true, true);
|
||||
|
||||
try {
|
||||
newSearcher(r).search(parentQuery, c);
|
||||
fail("should have hit exception");
|
||||
} catch (IllegalStateException ise) {
|
||||
// expected
|
||||
}
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue