SOLR-3076: fix BJQ to handle incoming liveDocs/filter correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242934 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-02-10 21:28:52 +00:00
parent 70a7d4975f
commit bea8fd0fb6
4 changed files with 238 additions and 101 deletions

View File

@ -698,7 +698,10 @@ Bug fixes
* LUCENE-3589: BytesRef copy(short) didnt set length.
(Peter Chang via Robert Muir)
* SOLR-3076: ToParent/ChildBlockJoinQuery was not handling an incoming
filter nor deleted docs correctly (Mikhail Khludnev via Mike
McCandless).
======================= Lucene 3.6.0 =======================

View File

@ -109,20 +109,27 @@ public class ToChildBlockJoinQuery extends Query {
parentWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
}
// NOTE: acceptDocs applies (and is checked) only in the
// child document space
@Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// Pass scoreDocsInOrder true, topScorer false to our sub:
final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, acceptDocs);
final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, null);
if (parentScorer == null) {
// No matches
return null;
}
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
// TODO: once we do random-access filters we can
// generalize this:
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
if (parents == null) {
// No matches
return null;
@ -131,7 +138,7 @@ public class ToChildBlockJoinQuery extends Query {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores);
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores, acceptDocs);
}
@Override
@ -151,16 +158,19 @@ public class ToChildBlockJoinQuery extends Query {
private final Scorer parentScorer;
private final FixedBitSet parentBits;
private final boolean doScores;
private final Bits acceptDocs;
private float parentScore;
private int childDoc = -1;
private int parentDoc;
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores) {
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores, Bits acceptDocs) {
super(weight);
this.doScores = doScores;
this.parentBits = parentBits;
this.parentScorer = parentScorer;
this.acceptDocs = acceptDocs;
}
@Override
@ -172,45 +182,56 @@ public class ToChildBlockJoinQuery extends Query {
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
if (childDoc+1 == parentDoc) {
// OK, we are done iterating through all children
// matching this one parent doc, so we now nextDoc()
// the parent. Use a while loop because we may have
// to skip over some number of parents w/ no
// children:
while (true) {
parentDoc = parentScorer.nextDoc();
if (parentDoc == 0) {
// Degenerate but allowed: parent has no children
// TODO: would be nice to pull initial parent
// into ctor so we can skip this if... but it's
// tricky because scorer must return -1 for
// .doc() on init...
// Loop until we hit a childDoc that's accepted
while (true) {
if (childDoc+1 == parentDoc) {
// OK, we are done iterating through all children
// matching this one parent doc, so we now nextDoc()
// the parent. Use a while loop because we may have
// to skip over some number of parents w/ no
// children:
while (true) {
parentDoc = parentScorer.nextDoc();
}
if (parentDoc == NO_MORE_DOCS) {
childDoc = NO_MORE_DOCS;
//System.out.println(" END");
return childDoc;
}
childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
if (childDoc < parentDoc) {
if (doScores) {
parentScore = parentScorer.score();
if (parentDoc == 0) {
// Degenerate but allowed: parent has no children
// TODO: would be nice to pull initial parent
// into ctor so we can skip this if... but it's
// tricky because scorer must return -1 for
// .doc() on init...
parentDoc = parentScorer.nextDoc();
}
if (parentDoc == NO_MORE_DOCS) {
childDoc = NO_MORE_DOCS;
//System.out.println(" END");
return childDoc;
}
childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
continue;
}
if (childDoc < parentDoc) {
if (doScores) {
parentScore = parentScorer.score();
}
//System.out.println(" " + childDoc);
return childDoc;
} else {
// Degenerate but allowed: parent has no children
}
//System.out.println(" " + childDoc);
return childDoc;
} else {
// Degenerate but allowed: parent has no children
}
} else {
assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
childDoc++;
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
continue;
}
//System.out.println(" " + childDoc);
return childDoc;
}
} else {
assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
childDoc++;
//System.out.println(" " + childDoc);
return childDoc;
}
}

View File

@ -169,11 +169,14 @@ public class ToParentBlockJoinQuery extends Query {
childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
}
// NOTE: acceptDocs applies (and is checked) only in the
// parent document space
@Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// Pass scoreDocsInOrder true, topScorer false to our sub:
final Scorer childScorer = childWeight.scorer(readerContext, true, false, acceptDocs);
final Scorer childScorer = childWeight.scorer(readerContext, true, false, null);
if (childScorer == null) {
// No matches
@ -186,9 +189,13 @@ public class ToParentBlockJoinQuery extends Query {
return null;
}
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
// TODO: once we do random-access filters we can
// generalize this:
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
if (parents == null) {
// No matches
return null;
@ -197,7 +204,7 @@ public class ToParentBlockJoinQuery extends Query {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs);
}
@Override
@ -217,6 +224,7 @@ public class ToParentBlockJoinQuery extends Query {
private final Scorer childScorer;
private final FixedBitSet parentBits;
private final ScoreMode scoreMode;
private final Bits acceptDocs;
private int parentDoc = -1;
private float parentScore;
private int nextChildDoc;
@ -225,12 +233,13 @@ public class ToParentBlockJoinQuery extends Query {
private float[] pendingChildScores;
private int childDocUpto;
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
super(weight);
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
this.parentBits = parentBits;
this.childScorer = childScorer;
this.scoreMode = scoreMode;
this.acceptDocs = acceptDocs;
if (scoreMode != ScoreMode.None) {
pendingChildScores = new float[5];
}
@ -273,60 +282,75 @@ public class ToParentBlockJoinQuery extends Query {
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
if (nextChildDoc == NO_MORE_DOCS) {
//System.out.println(" end");
return parentDoc = NO_MORE_DOCS;
// Loop until we hit a parentDoc that's accepted
while (true) {
if (nextChildDoc == NO_MORE_DOCS) {
//System.out.println(" end");
return parentDoc = NO_MORE_DOCS;
}
// Gather all children sharing the same parent as
// nextChildDoc
parentDoc = parentBits.nextSetBit(nextChildDoc);
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
//System.out.println(" nextChildDoc=" + nextChildDoc);
if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
// Parent doc not accepted; skip child docs until
// we hit a new parent doc:
do {
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
continue;
}
float totalScore = 0;
float maxScore = Float.NEGATIVE_INFINITY;
childDocUpto = 0;
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
pendingChildDocs[childDocUpto] = nextChildDoc;
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
totalScore += childScore;
}
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != parentDoc;
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
break;
case Max:
parentScore = maxScore;
break;
case Total:
parentScore = totalScore;
break;
case None:
break;
}
//System.out.println(" return parentDoc=" + parentDoc);
return parentDoc;
}
// Gather all children sharing the same parent as nextChildDoc
parentDoc = parentBits.nextSetBit(nextChildDoc);
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
float totalScore = 0;
float maxScore = Float.NEGATIVE_INFINITY;
childDocUpto = 0;
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
pendingChildDocs[childDocUpto] = nextChildDoc;
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
totalScore += childScore;
}
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
//System.out.println(" nextChildDoc=" + nextChildDoc);
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != parentDoc;
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
break;
case Max:
parentScore = maxScore;
break;
case Total:
parentScore = totalScore;
break;
case None:
break;
}
//System.out.println(" return parentDoc=" + parentDoc);
return parentDoc;
}
@Override

View File

@ -151,10 +151,75 @@ public class TestBlockJoin extends LuceneTestCase {
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, ((StoredField) childDoc.getField("year")).numericValue());
assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name"));
// Test with filter on child docs:
assertEquals(0, s.search(fullChildQuery,
new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))),
1).totalHits);
r.close();
dir.close();
}
public void testSimpleFilter() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
final List<Document> docs = new ArrayList<Document>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("ruby", 2005));
docs.add(makeJob("java", 2006));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
// Define parent document criteria (find a resident in the UK)
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, parentsFilter, 10).totalHits);
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
// not found test
assertEquals("noone live there", 0, s.search(childJoinQuery, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
// apply the UK filter by the searcher
TopDocs ukOnly = s.search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
assertEquals("has filter - single passed", 1, ukOnly.totalHits);
assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name"));
// looking for US candidates
TopDocs usThen = s.search(childJoinQuery , new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
assertEquals("has filter - single passed", 1, usThen.totalHits);
assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name"));
r.close();
dir.close();
}
private Document getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException {
final AtomicReaderContext[] leaves = reader.getTopReaderContext().leaves();
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
@ -241,6 +306,9 @@ public class TestBlockJoin extends LuceneTestCase {
// Values for child fields:
final String[][] childFields = getRandomFields(numParentDocs);
final boolean doDeletes = random.nextBoolean();
final List<Integer> toDelete = new ArrayList<Integer>();
// TODO: parallel star join, nested join cases too!
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
@ -261,6 +329,11 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
if (doDeletes) {
parentDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
parentJoinDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
}
final List<Document> joinDocs = new ArrayList<Document>();
if (VERBOSE) {
@ -308,12 +381,28 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println(" " + sb.toString());
}
if (doDeletes) {
joinChildDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
}
w.addDocument(childDoc);
}
// Parent last:
joinDocs.add(parentJoinDoc);
joinW.addDocuments(joinDocs);
if (doDeletes && random.nextInt(30) == 7) {
toDelete.add(parentDocID);
}
}
for(int deleteID : toDelete) {
if (VERBOSE) {
System.out.println("DELETE parentID=" + deleteID);
}
w.deleteDocuments(new Term("blockID", ""+deleteID));
joinW.deleteDocuments(new Term("blockID", ""+deleteID));
}
final IndexReader r = w.getReader();