lucene 4: upgraded o.e.index.search.nested package. Also fixed issue with liveDocs in child package.

This commit is contained in:
Martijn van Groningen 2012-10-28 17:04:10 +01:00 committed by Shay Banon
parent a49078dfc1
commit cdf1fc8981
8 changed files with 165 additions and 120 deletions

View File

@ -178,7 +178,7 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec
}
public boolean get(int doc) {
return !acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc));
return acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc));
}
}

View File

@ -143,7 +143,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle
}
public boolean get(int doc) {
return !acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc));
return acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc));
}
}
@ -229,7 +229,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle
}
public boolean get(int doc) {
if (acceptDocs.get(doc) || doc == -1) {
if (!acceptDocs.get(doc) || doc == -1) {
return false;
}

View File

@ -138,7 +138,7 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase {
for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
AtomicReader indexReader = atomicReaderContext.reader();
int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
if (parentDocId != -1 && !indexReader.getLiveDocs().get(parentDocId)) {
if (parentDocId != -1 && indexReader.getLiveDocs().get(parentDocId)) {
// we found a match, add it and break
TIntObjectHashMap<ParentDoc> readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey());

View File

@ -19,15 +19,20 @@
package org.elasticsearch.index.search.nested;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.docset.FixedBitDocSet;
import org.elasticsearch.common.lucene.search.NoopCollector;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
import java.util.Set;
/**
@ -75,8 +80,6 @@ public class BlockJoinQuery extends Query {
public static enum ScoreMode {None, Avg, Max, Total}
;
private final Filter parentsFilter;
private final Query childQuery;
@ -112,7 +115,7 @@ public class BlockJoinQuery extends Query {
}
@Override
public Weight createWeight(Searcher searcher) throws IOException {
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode, childCollector);
}
@ -138,24 +141,19 @@ public class BlockJoinQuery extends Query {
}
@Override
public float getValue() {
return childWeight.getValue();
public float getValueForNormalization() throws IOException {
return childWeight.getValueForNormalization() * joinQuery.getBoost() * joinQuery.getBoost();
}
@Override
public float sumOfSquaredWeights() throws IOException {
return childWeight.sumOfSquaredWeights() * joinQuery.getBoost() * joinQuery.getBoost();
public void normalize(float norm, float topLevelBoost) {
childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
}
@Override
public void normalize(float norm) {
childWeight.normalize(norm * joinQuery.getBoost());
}
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
// Pass scoreDocsInOrder true, topScorer false to our sub:
final Scorer childScorer = childWeight.scorer(reader, true, false);
final Scorer childScorer = childWeight.scorer(context, true, false, null);
if (childScorer == null) {
// No matches
@ -168,7 +166,7 @@ public class BlockJoinQuery extends Query {
return null;
}
DocIdSet parents = parentsFilter.getDocIdSet(reader);
DocIdSet parents = parentsFilter.getDocIdSet(context, null);
// TODO NESTED: We have random access in ES, not sure I understand what can be gain?
// TODO: once we do random-access filters we can
// generalize this:
@ -186,18 +184,22 @@ public class BlockJoinQuery extends Query {
// CHANGE:
if (childCollector != null) {
childCollector.setNextReader(reader, 0);
childCollector.setNextReader(context);
childCollector.setScorer(childScorer);
}
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector);
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector, acceptDocs);
}
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
// TODO
throw new UnsupportedOperationException(getClass().getName() +
" cannot explain match on parent document");
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
if (scorer.advance(doc) == doc) {
return scorer.explain(context.docBase);
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@Override
@ -210,21 +212,25 @@ public class BlockJoinQuery extends Query {
private final Scorer childScorer;
private final FixedBitSet parentBits;
private final ScoreMode scoreMode;
private final Bits acceptDocs; // LUCENE 4 UPGRADE: Why not make the parentBits already be filtered by acceptDocs?
private final Collector childCollector;
private int parentDoc = -1;
private int prevParentDoc;
private float parentScore;
private float parentFreq;
private int nextChildDoc;
private int[] pendingChildDocs = new int[5];
private float[] pendingChildScores;
private int childDocUpto;
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector) {
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector, Bits acceptDocs) {
super(weight);
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
this.parentBits = parentBits;
this.childScorer = childScorer;
this.scoreMode = scoreMode;
this.acceptDocs = acceptDocs;
this.childCollector = childCollector;
if (scoreMode != ScoreMode.None) {
pendingChildScores = new float[5];
@ -233,11 +239,8 @@ public class BlockJoinQuery extends Query {
}
@Override
public void visitSubScorers(Query parent, BooleanClause.Occur relationship,
ScorerVisitor<Query, Query, Scorer> visitor) {
super.visitSubScorers(parent, relationship, visitor);
//childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor);
childScorer.visitScorers(visitor);
public Collection<ChildScorer> getChildren() {
return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
}
int getChildCount() {
@ -271,64 +274,88 @@ public class BlockJoinQuery extends Query {
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
if (nextChildDoc == NO_MORE_DOCS) {
//System.out.println(" end");
return parentDoc = NO_MORE_DOCS;
}
// Loop until we hit a parentDoc that's accepted
while (true) {
if (nextChildDoc == NO_MORE_DOCS) {
//System.out.println(" end");
return parentDoc = NO_MORE_DOCS;
}
// Gather all children sharing the same parent as nextChildDoc
parentDoc = parentBits.nextSetBit(nextChildDoc);
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
// Gather all children sharing the same parent as
// nextChildDoc
float totalScore = 0;
float maxScore = Float.NEGATIVE_INFINITY;
parentDoc = parentBits.nextSetBit(nextChildDoc);
childDocUpto = 0;
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
if (scoreMode != ScoreMode.None) {
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
//System.out.println(" nextChildDoc=" + nextChildDoc);
if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
// Parent doc not accepted; skip child docs until
// we hit a new parent doc:
do {
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
continue;
}
float totalScore = 0;
float totalFreq = 0;
float maxScore = Float.NEGATIVE_INFINITY;
float maxFreq = 0;
childDocUpto = 0;
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
}
pendingChildDocs[childDocUpto] = nextChildDoc;
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
totalScore += childScore;
pendingChildDocs[childDocUpto] = nextChildDoc;
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final float childFreq = childScorer.freq();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
maxFreq = Math.max(childFreq, maxFreq);
totalScore += childScore;
totalFreq += childFreq;
}
// CHANGE:
childCollector.collect(nextChildDoc);
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != parentDoc;
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
parentFreq = totalFreq / childDocUpto;
break;
case Max:
parentScore = maxScore;
parentFreq = maxFreq;
break;
case Total:
parentScore = totalScore;
parentFreq = totalFreq;
break;
case None:
break;
}
// CHANGE:
childCollector.collect(nextChildDoc);
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
//System.out.println(" nextChildDoc=" + nextChildDoc);
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != parentDoc;
switch (scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
break;
case Max:
parentScore = maxScore;
break;
case Total:
parentScore = totalScore;
break;
case None:
break;
//System.out.println(" return parentDoc=" + parentDoc);
return parentDoc;
}
//System.out.println(" return parentDoc=" + parentDoc);
return parentDoc;
}
@Override
@ -341,6 +368,11 @@ public class BlockJoinQuery extends Query {
return parentScore;
}
@Override
public float freq() throws IOException {
return parentFreq;
}
@Override
public int advance(int parentTarget) throws IOException {
@ -359,7 +391,7 @@ public class BlockJoinQuery extends Query {
return nextDoc();
}
final int prevParentDoc = parentBits.prevSetBit(parentTarget - 1);
prevParentDoc = parentBits.prevSetBit(parentTarget - 1);
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
assert prevParentDoc >= parentDoc;
@ -377,6 +409,15 @@ public class BlockJoinQuery extends Query {
//System.out.println(" return nextParentDoc=" + nd);
return nd;
}
public Explanation explain(int docBase) throws IOException {
int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
return new ComplexExplanation(
true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end)
);
}
}
@Override
@ -427,8 +468,8 @@ public class BlockJoinQuery extends Query {
}
@Override
public Object clone() {
return new BlockJoinQuery((Query) origChildQuery.clone(),
public Query clone() {
return new BlockJoinQuery(origChildQuery.clone(),
parentsFilter,
scoreMode).setCollector(childCollector);
}

View File

@ -1,12 +1,15 @@
package org.elasticsearch.index.search.nested;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.docset.FixedBitDocSet;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
/**
@ -48,7 +51,7 @@ public class IncludeNestedDocsQuery extends Query {
}
@Override
public Weight createWeight(Searcher searcher) throws IOException {
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new IncludeNestedDocsWeight(parentQuery, parentQuery.createWeight(searcher), parentFilter);
}
@ -70,30 +73,25 @@ public class IncludeNestedDocsQuery extends Query {
}
@Override
public float getValue() {
return parentWeight.getValue();
public void normalize(float norm, float topLevelBoost) {
parentWeight.normalize(norm, topLevelBoost);
}
@Override
public float sumOfSquaredWeights() throws IOException {
return parentWeight.sumOfSquaredWeights() * parentQuery.getBoost() * parentQuery.getBoost();
public float getValueForNormalization() throws IOException {
return parentWeight.getValueForNormalization(); // this query is never boosted so just delegate...
}
@Override
public void normalize(float norm) {
parentWeight.normalize(norm * parentQuery.getBoost());
}
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
final Scorer parentScorer = parentWeight.scorer(reader, true, false);
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
final Scorer parentScorer = parentWeight.scorer(context, true, false, acceptDocs);
// no matches
if (parentScorer == null) {
return null;
}
DocIdSet parents = parentsFilter.getDocIdSet(reader);
DocIdSet parents = parentsFilter.getDocIdSet(context, acceptDocs);
if (parents == null) {
// No matches
return null;
@ -114,8 +112,8 @@ public class IncludeNestedDocsQuery extends Query {
}
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
return null;
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return null; //Query is used internally and not by users, so explain can be empty
}
@Override
@ -154,12 +152,10 @@ public class IncludeNestedDocsQuery extends Query {
}
@Override
public void visitSubScorers(Query parent, BooleanClause.Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
super.visitSubScorers(parent, relationship, visitor);
parentScorer.visitScorers(visitor);
public Collection<ChildScorer> getChildren() {
return parentScorer.getChildren();
}
@Override
public int nextDoc() throws IOException {
if (currentParentPointer == NO_MORE_DOCS) {
return (currentDoc = NO_MORE_DOCS);
@ -187,7 +183,6 @@ public class IncludeNestedDocsQuery extends Query {
return currentDoc;
}
@Override
public int advance(int target) throws IOException {
if (target == NO_MORE_DOCS) {
return (currentDoc = NO_MORE_DOCS);
@ -224,6 +219,10 @@ public class IncludeNestedDocsQuery extends Query {
return parentScorer.score();
}
public float freq() throws IOException {
return parentScorer.freq();
}
public int docID() {
return currentDoc;
}
@ -269,8 +268,8 @@ public class IncludeNestedDocsQuery extends Query {
}
@Override
public Object clone() {
Query clonedQuery = (Query) origParentQuery.clone();
public Query clone() {
Query clonedQuery = origParentQuery.clone();
return new IncludeNestedDocsQuery(clonedQuery, this);
}
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.search.nested;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
@ -47,8 +49,6 @@ public class NestedChildrenCollector extends FacetCollector {
private FixedBitSet parentDocs;
private IndexReader currentReader;
public NestedChildrenCollector(FacetCollector collector, Filter parentFilter, Filter childFilter) {
this.collector = collector;
this.parentFilter = parentFilter;
@ -72,11 +72,12 @@ public class NestedChildrenCollector extends FacetCollector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
collector.setNextReader(reader, docBase);
currentReader = reader;
childDocs = DocSets.convert(reader, childFilter.getDocIdSet(reader));
DocIdSet docIdSet = parentFilter.getDocIdSet(reader);
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
// Can use null as acceptedDocs here, since only live doc ids are being pushed to collect method.
DocIdSet docIdSet = parentFilter.getDocIdSet(context, null);
// Im ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
childDocs = DocSets.convert(context.reader(), childFilter.getDocIdSet(context, null));
if (docIdSet == null) {
parentDocs = null;
} else if (docIdSet instanceof FixedBitDocSet) {
@ -98,7 +99,7 @@ public class NestedChildrenCollector extends FacetCollector {
}
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
for (int i = (parentDoc - 1); i > prevParentDoc; i--) {
if (!currentReader.isDeleted(i) && childDocs.get(i)) {
if (childDocs.get(i)) {
collector.collect(i);
}
}

View File

@ -19,11 +19,13 @@
package org.elasticsearch.index.search.nested;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.PrefixFilter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import java.io.IOException;
@ -41,8 +43,8 @@ public class NestedDocsFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return filter.getDocIdSet(context, acceptDocs);
}
@Override

View File

@ -19,11 +19,13 @@
package org.elasticsearch.index.search.nested;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.PrefixFilter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
@ -42,14 +44,14 @@ public class NonNestedDocsFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
DocIdSet docSet = filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
DocIdSet docSet = filter.getDocIdSet(context, acceptDocs);
if (docSet == null || docSet == DocIdSet.EMPTY_DOCIDSET) {
// will almost never happen, and we need an OpenBitSet for the parent filter in
// BlockJoinQuery, we cache it anyhow...
docSet = new FixedBitSet(reader.maxDoc());
docSet = new FixedBitSet(context.reader().maxDoc());
}
((FixedBitSet) docSet).flip(0, reader.maxDoc());
((FixedBitSet) docSet).flip(0, context.reader().maxDoc());
return docSet;
}