mirror of https://github.com/apache/lucene.git
SOLR-3076: fix BJQ to handle incoming liveDocs/filter correctly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242934 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70a7d4975f
commit
bea8fd0fb6
|
@ -698,7 +698,10 @@ Bug fixes
|
|||
|
||||
* LUCENE-3589: BytesRef copy(short) didnt set length.
|
||||
(Peter Chang via Robert Muir)
|
||||
|
||||
|
||||
* SOLR-3076: ToParent/ChildBlockJoinQuery was not handling an incoming
|
||||
filter nor deleted docs correctly (Mikhail Khludnev via Mike
|
||||
McCandless).
|
||||
|
||||
======================= Lucene 3.6.0 =======================
|
||||
|
||||
|
|
|
@ -109,20 +109,27 @@ public class ToChildBlockJoinQuery extends Query {
|
|||
parentWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
|
||||
}
|
||||
|
||||
// NOTE: acceptDocs applies (and is checked) only in the
|
||||
// child document space
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
|
||||
// Pass scoreDocsInOrder true, topScorer false to our sub:
|
||||
final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, acceptDocs);
|
||||
final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, null);
|
||||
|
||||
if (parentScorer == null) {
|
||||
// No matches
|
||||
return null;
|
||||
}
|
||||
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
|
||||
// TODO: once we do random-access filters we can
|
||||
// generalize this:
|
||||
// NOTE: we cannot pass acceptDocs here because this
|
||||
// will (most likely, justifiably) cause the filter to
|
||||
// not return a FixedBitSet but rather a
|
||||
// BitsFilteredDocIdSet. Instead, we filter by
|
||||
// acceptDocs when we score:
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
|
||||
|
||||
if (parents == null) {
|
||||
// No matches
|
||||
return null;
|
||||
|
@ -131,7 +138,7 @@ public class ToChildBlockJoinQuery extends Query {
|
|||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
|
||||
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores);
|
||||
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores, acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,16 +158,19 @@ public class ToChildBlockJoinQuery extends Query {
|
|||
private final Scorer parentScorer;
|
||||
private final FixedBitSet parentBits;
|
||||
private final boolean doScores;
|
||||
private final Bits acceptDocs;
|
||||
|
||||
private float parentScore;
|
||||
|
||||
private int childDoc = -1;
|
||||
private int parentDoc;
|
||||
|
||||
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores) {
|
||||
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores, Bits acceptDocs) {
|
||||
super(weight);
|
||||
this.doScores = doScores;
|
||||
this.parentBits = parentBits;
|
||||
this.parentScorer = parentScorer;
|
||||
this.acceptDocs = acceptDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -172,45 +182,56 @@ public class ToChildBlockJoinQuery extends Query {
|
|||
public int nextDoc() throws IOException {
|
||||
//System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
|
||||
|
||||
if (childDoc+1 == parentDoc) {
|
||||
// OK, we are done iterating through all children
|
||||
// matching this one parent doc, so we now nextDoc()
|
||||
// the parent. Use a while loop because we may have
|
||||
// to skip over some number of parents w/ no
|
||||
// children:
|
||||
while (true) {
|
||||
parentDoc = parentScorer.nextDoc();
|
||||
if (parentDoc == 0) {
|
||||
// Degenerate but allowed: parent has no children
|
||||
// TODO: would be nice to pull initial parent
|
||||
// into ctor so we can skip this if... but it's
|
||||
// tricky because scorer must return -1 for
|
||||
// .doc() on init...
|
||||
// Loop until we hit a childDoc that's accepted
|
||||
while (true) {
|
||||
if (childDoc+1 == parentDoc) {
|
||||
// OK, we are done iterating through all children
|
||||
// matching this one parent doc, so we now nextDoc()
|
||||
// the parent. Use a while loop because we may have
|
||||
// to skip over some number of parents w/ no
|
||||
// children:
|
||||
while (true) {
|
||||
parentDoc = parentScorer.nextDoc();
|
||||
}
|
||||
|
||||
if (parentDoc == NO_MORE_DOCS) {
|
||||
childDoc = NO_MORE_DOCS;
|
||||
//System.out.println(" END");
|
||||
return childDoc;
|
||||
}
|
||||
|
||||
childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
|
||||
if (childDoc < parentDoc) {
|
||||
if (doScores) {
|
||||
parentScore = parentScorer.score();
|
||||
if (parentDoc == 0) {
|
||||
// Degenerate but allowed: parent has no children
|
||||
// TODO: would be nice to pull initial parent
|
||||
// into ctor so we can skip this if... but it's
|
||||
// tricky because scorer must return -1 for
|
||||
// .doc() on init...
|
||||
parentDoc = parentScorer.nextDoc();
|
||||
}
|
||||
|
||||
if (parentDoc == NO_MORE_DOCS) {
|
||||
childDoc = NO_MORE_DOCS;
|
||||
//System.out.println(" END");
|
||||
return childDoc;
|
||||
}
|
||||
|
||||
childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
|
||||
|
||||
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (childDoc < parentDoc) {
|
||||
if (doScores) {
|
||||
parentScore = parentScorer.score();
|
||||
}
|
||||
//System.out.println(" " + childDoc);
|
||||
return childDoc;
|
||||
} else {
|
||||
// Degenerate but allowed: parent has no children
|
||||
}
|
||||
//System.out.println(" " + childDoc);
|
||||
return childDoc;
|
||||
} else {
|
||||
// Degenerate but allowed: parent has no children
|
||||
}
|
||||
} else {
|
||||
assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
|
||||
childDoc++;
|
||||
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
|
||||
continue;
|
||||
}
|
||||
//System.out.println(" " + childDoc);
|
||||
return childDoc;
|
||||
}
|
||||
} else {
|
||||
assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
|
||||
childDoc++;
|
||||
//System.out.println(" " + childDoc);
|
||||
return childDoc;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -169,11 +169,14 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
|
||||
}
|
||||
|
||||
// NOTE: acceptDocs applies (and is checked) only in the
|
||||
// parent document space
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
|
||||
// Pass scoreDocsInOrder true, topScorer false to our sub:
|
||||
final Scorer childScorer = childWeight.scorer(readerContext, true, false, acceptDocs);
|
||||
final Scorer childScorer = childWeight.scorer(readerContext, true, false, null);
|
||||
|
||||
if (childScorer == null) {
|
||||
// No matches
|
||||
|
@ -186,9 +189,13 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
|
||||
// TODO: once we do random-access filters we can
|
||||
// generalize this:
|
||||
// NOTE: we cannot pass acceptDocs here because this
|
||||
// will (most likely, justifiably) cause the filter to
|
||||
// not return a FixedBitSet but rather a
|
||||
// BitsFilteredDocIdSet. Instead, we filter by
|
||||
// acceptDocs when we score:
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
|
||||
|
||||
if (parents == null) {
|
||||
// No matches
|
||||
return null;
|
||||
|
@ -197,7 +204,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
|
||||
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
|
||||
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -217,6 +224,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
private final Scorer childScorer;
|
||||
private final FixedBitSet parentBits;
|
||||
private final ScoreMode scoreMode;
|
||||
private final Bits acceptDocs;
|
||||
private int parentDoc = -1;
|
||||
private float parentScore;
|
||||
private int nextChildDoc;
|
||||
|
@ -225,12 +233,13 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
private float[] pendingChildScores;
|
||||
private int childDocUpto;
|
||||
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
|
||||
super(weight);
|
||||
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
|
||||
this.parentBits = parentBits;
|
||||
this.childScorer = childScorer;
|
||||
this.scoreMode = scoreMode;
|
||||
this.acceptDocs = acceptDocs;
|
||||
if (scoreMode != ScoreMode.None) {
|
||||
pendingChildScores = new float[5];
|
||||
}
|
||||
|
@ -273,60 +282,75 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
public int nextDoc() throws IOException {
|
||||
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
|
||||
|
||||
if (nextChildDoc == NO_MORE_DOCS) {
|
||||
//System.out.println(" end");
|
||||
return parentDoc = NO_MORE_DOCS;
|
||||
// Loop until we hit a parentDoc that's accepted
|
||||
while (true) {
|
||||
if (nextChildDoc == NO_MORE_DOCS) {
|
||||
//System.out.println(" end");
|
||||
return parentDoc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
// Gather all children sharing the same parent as
|
||||
// nextChildDoc
|
||||
|
||||
parentDoc = parentBits.nextSetBit(nextChildDoc);
|
||||
|
||||
//System.out.println(" parentDoc=" + parentDoc);
|
||||
assert parentDoc != -1;
|
||||
|
||||
//System.out.println(" nextChildDoc=" + nextChildDoc);
|
||||
if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
|
||||
// Parent doc not accepted; skip child docs until
|
||||
// we hit a new parent doc:
|
||||
do {
|
||||
nextChildDoc = childScorer.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
continue;
|
||||
}
|
||||
|
||||
float totalScore = 0;
|
||||
float maxScore = Float.NEGATIVE_INFINITY;
|
||||
|
||||
childDocUpto = 0;
|
||||
do {
|
||||
//System.out.println(" c=" + nextChildDoc);
|
||||
if (pendingChildDocs.length == childDocUpto) {
|
||||
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
|
||||
}
|
||||
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
|
||||
pendingChildScores = ArrayUtil.grow(pendingChildScores);
|
||||
}
|
||||
pendingChildDocs[childDocUpto] = nextChildDoc;
|
||||
if (scoreMode != ScoreMode.None) {
|
||||
// TODO: specialize this into dedicated classes per-scoreMode
|
||||
final float childScore = childScorer.score();
|
||||
pendingChildScores[childDocUpto] = childScore;
|
||||
maxScore = Math.max(childScore, maxScore);
|
||||
totalScore += childScore;
|
||||
}
|
||||
childDocUpto++;
|
||||
nextChildDoc = childScorer.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
assert nextChildDoc != parentDoc;
|
||||
|
||||
switch(scoreMode) {
|
||||
case Avg:
|
||||
parentScore = totalScore / childDocUpto;
|
||||
break;
|
||||
case Max:
|
||||
parentScore = maxScore;
|
||||
break;
|
||||
case Total:
|
||||
parentScore = totalScore;
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" return parentDoc=" + parentDoc);
|
||||
return parentDoc;
|
||||
}
|
||||
|
||||
// Gather all children sharing the same parent as nextChildDoc
|
||||
parentDoc = parentBits.nextSetBit(nextChildDoc);
|
||||
//System.out.println(" parentDoc=" + parentDoc);
|
||||
assert parentDoc != -1;
|
||||
|
||||
float totalScore = 0;
|
||||
float maxScore = Float.NEGATIVE_INFINITY;
|
||||
|
||||
childDocUpto = 0;
|
||||
do {
|
||||
//System.out.println(" c=" + nextChildDoc);
|
||||
if (pendingChildDocs.length == childDocUpto) {
|
||||
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
|
||||
}
|
||||
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
|
||||
pendingChildScores = ArrayUtil.grow(pendingChildScores);
|
||||
}
|
||||
pendingChildDocs[childDocUpto] = nextChildDoc;
|
||||
if (scoreMode != ScoreMode.None) {
|
||||
// TODO: specialize this into dedicated classes per-scoreMode
|
||||
final float childScore = childScorer.score();
|
||||
pendingChildScores[childDocUpto] = childScore;
|
||||
maxScore = Math.max(childScore, maxScore);
|
||||
totalScore += childScore;
|
||||
}
|
||||
childDocUpto++;
|
||||
nextChildDoc = childScorer.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
//System.out.println(" nextChildDoc=" + nextChildDoc);
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
assert nextChildDoc != parentDoc;
|
||||
|
||||
switch(scoreMode) {
|
||||
case Avg:
|
||||
parentScore = totalScore / childDocUpto;
|
||||
break;
|
||||
case Max:
|
||||
parentScore = maxScore;
|
||||
break;
|
||||
case Total:
|
||||
parentScore = totalScore;
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" return parentDoc=" + parentDoc);
|
||||
return parentDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -151,10 +151,75 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
assertEquals("java", childDoc.get("skill"));
|
||||
assertEquals(2007, ((StoredField) childDoc.getField("year")).numericValue());
|
||||
assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name"));
|
||||
|
||||
// Test with filter on child docs:
|
||||
assertEquals(0, s.search(fullChildQuery,
|
||||
new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))),
|
||||
1).totalHits);
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSimpleFilter() throws Exception {
|
||||
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||
|
||||
final List<Document> docs = new ArrayList<Document>();
|
||||
|
||||
docs.add(makeJob("java", 2007));
|
||||
docs.add(makeJob("python", 2010));
|
||||
docs.add(makeResume("Lisa", "United Kingdom"));
|
||||
w.addDocuments(docs);
|
||||
|
||||
docs.clear();
|
||||
docs.add(makeJob("ruby", 2005));
|
||||
docs.add(makeJob("java", 2006));
|
||||
docs.add(makeResume("Frank", "United States"));
|
||||
w.addDocuments(docs);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
|
||||
// Create a filter that defines "parent" documents in the index - in this case resumes
|
||||
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
|
||||
|
||||
// Define child document criteria (finds an example of relevant work experience)
|
||||
BooleanQuery childQuery = new BooleanQuery();
|
||||
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
|
||||
childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
|
||||
|
||||
// Define parent document criteria (find a resident in the UK)
|
||||
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
|
||||
|
||||
// Wrap the child document query to 'join' any matches
|
||||
// up to corresponding parent:
|
||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
||||
|
||||
assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
|
||||
|
||||
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, parentsFilter, 10).totalHits);
|
||||
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
|
||||
|
||||
// not found test
|
||||
assertEquals("noone live there", 0, s.search(childJoinQuery, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
|
||||
assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
|
||||
|
||||
// apply the UK filter by the searcher
|
||||
TopDocs ukOnly = s.search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
|
||||
assertEquals("has filter - single passed", 1, ukOnly.totalHits);
|
||||
assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name"));
|
||||
|
||||
// looking for US candidates
|
||||
TopDocs usThen = s.search(childJoinQuery , new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
|
||||
assertEquals("has filter - single passed", 1, usThen.totalHits);
|
||||
assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name"));
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private Document getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException {
|
||||
final AtomicReaderContext[] leaves = reader.getTopReaderContext().leaves();
|
||||
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
|
||||
|
@ -241,6 +306,9 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
// Values for child fields:
|
||||
final String[][] childFields = getRandomFields(numParentDocs);
|
||||
|
||||
final boolean doDeletes = random.nextBoolean();
|
||||
final List<Integer> toDelete = new ArrayList<Integer>();
|
||||
|
||||
// TODO: parallel star join, nested join cases too!
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||
final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
|
||||
|
@ -261,6 +329,11 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
if (doDeletes) {
|
||||
parentDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
|
||||
parentJoinDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
|
||||
}
|
||||
|
||||
final List<Document> joinDocs = new ArrayList<Document>();
|
||||
|
||||
if (VERBOSE) {
|
||||
|
@ -308,12 +381,28 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
System.out.println(" " + sb.toString());
|
||||
}
|
||||
|
||||
if (doDeletes) {
|
||||
joinChildDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
|
||||
}
|
||||
|
||||
w.addDocument(childDoc);
|
||||
}
|
||||
|
||||
// Parent last:
|
||||
joinDocs.add(parentJoinDoc);
|
||||
joinW.addDocuments(joinDocs);
|
||||
|
||||
if (doDeletes && random.nextInt(30) == 7) {
|
||||
toDelete.add(parentDocID);
|
||||
}
|
||||
}
|
||||
|
||||
for(int deleteID : toDelete) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("DELETE parentID=" + deleteID);
|
||||
}
|
||||
w.deleteDocuments(new Term("blockID", ""+deleteID));
|
||||
joinW.deleteDocuments(new Term("blockID", ""+deleteID));
|
||||
}
|
||||
|
||||
final IndexReader r = w.getReader();
|
||||
|
|
Loading…
Reference in New Issue