From cdf1fc8981c99ed8af58a33bd2bba63fe192c26d Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Sun, 28 Oct 2012 17:04:10 +0100 Subject: [PATCH] lucene 4: upgraded o.e.index.search.nested package. Also fixed issue with liveDocs in child package. --- .../index/search/child/HasChildFilter.java | 2 +- .../index/search/child/HasParentFilter.java | 4 +- .../index/search/child/TopChildrenQuery.java | 2 +- .../index/search/nested/BlockJoinQuery.java | 201 +++++++++++------- .../search/nested/IncludeNestedDocsQuery.java | 43 ++-- .../nested/NestedChildrenCollector.java | 17 +- .../index/search/nested/NestedDocsFilter.java | 6 +- .../search/nested/NonNestedDocsFilter.java | 10 +- 8 files changed, 165 insertions(+), 120 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java index 2833fe70746..9f33e2d9614 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java @@ -178,7 +178,7 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec } public boolean get(int doc) { - return !acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); + return acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); } } diff --git a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java index 57e2822c646..8cb758748c5 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java @@ -143,7 +143,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - return !acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); + return acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); } } @@ -229,7 +229,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - if (acceptDocs.get(doc) || doc == -1) { + if (!acceptDocs.get(doc) || doc == -1) { return false; } diff --git a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java index 7fe64c8a4bb..1b9634f4a51 100644 --- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java @@ -138,7 +138,7 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { AtomicReader indexReader = atomicReaderContext.reader(); int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId); - if (parentDocId != -1 && !indexReader.getLiveDocs().get(parentDocId)) { + if (parentDocId != -1 && indexReader.getLiveDocs().get(parentDocId)) { // we found a match, add it and break TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey()); diff --git a/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java b/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java index d06e35883ba..579086993c2 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java +++ b/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java @@ -19,15 +19,20 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.FixedBitDocSet; import org.elasticsearch.common.lucene.search.NoopCollector; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Locale; import java.util.Set; /** @@ -75,8 +80,6 @@ public class BlockJoinQuery extends Query { public static enum ScoreMode {None, Avg, Max, Total} - ; - private final Filter parentsFilter; private final Query childQuery; @@ -112,7 +115,7 @@ public class BlockJoinQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode, childCollector); } @@ -138,24 +141,19 @@ public class BlockJoinQuery extends Query { } @Override - public float getValue() { - return childWeight.getValue(); + public float getValueForNormalization() throws IOException { + return childWeight.getValueForNormalization() * joinQuery.getBoost() * joinQuery.getBoost(); } @Override - public float sumOfSquaredWeights() throws IOException { - return childWeight.sumOfSquaredWeights() * joinQuery.getBoost() * joinQuery.getBoost(); + public void normalize(float norm, float topLevelBoost) { + childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost()); } @Override - public void normalize(float norm) { - childWeight.normalize(norm * joinQuery.getBoost()); - } - - @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { // Pass scoreDocsInOrder true, topScorer false to our sub: - final Scorer childScorer = childWeight.scorer(reader, true, false); + final Scorer childScorer = childWeight.scorer(context, true, false, null); if (childScorer == null) { // No matches @@ -168,7 +166,7 @@ public class BlockJoinQuery extends Query { return null; } - DocIdSet parents = parentsFilter.getDocIdSet(reader); + DocIdSet parents = parentsFilter.getDocIdSet(context, null); // TODO NESTED: We have random access in ES, not sure I understand what can be gain? // TODO: once we do random-access filters we can // generalize this: @@ -186,18 +184,22 @@ public class BlockJoinQuery extends Query { // CHANGE: if (childCollector != null) { - childCollector.setNextReader(reader, 0); + childCollector.setNextReader(context); childCollector.setScorer(childScorer); } - return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector); + return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector, acceptDocs); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - // TODO - throw new UnsupportedOperationException(getClass().getName() + - " cannot explain match on parent document"); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs()); + if (scorer != null) { + if (scorer.advance(doc) == doc) { + return scorer.explain(context.docBase); + } + } + return new ComplexExplanation(false, 0.0f, "Not a match"); } @Override @@ -210,21 +212,25 @@ public class BlockJoinQuery extends Query { private final Scorer childScorer; private final FixedBitSet parentBits; private final ScoreMode scoreMode; + private final Bits acceptDocs; // LUCENE 4 UPGRADE: Why not make the parentBits already be filtered by acceptDocs? private final Collector childCollector; private int parentDoc = -1; + private int prevParentDoc; private float parentScore; + private float parentFreq; private int nextChildDoc; private int[] pendingChildDocs = new int[5]; private float[] pendingChildScores; private int childDocUpto; - public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector) { + public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector, Bits acceptDocs) { super(weight); //System.out.println("Q.init firstChildDoc=" + firstChildDoc); this.parentBits = parentBits; this.childScorer = childScorer; this.scoreMode = scoreMode; + this.acceptDocs = acceptDocs; this.childCollector = childCollector; if (scoreMode != ScoreMode.None) { pendingChildScores = new float[5]; @@ -233,11 +239,8 @@ public class BlockJoinQuery extends Query { } @Override - public void visitSubScorers(Query parent, BooleanClause.Occur relationship, - ScorerVisitor visitor) { - super.visitSubScorers(parent, relationship, visitor); - //childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor); - childScorer.visitScorers(visitor); + public Collection getChildren() { + return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN")); } int getChildCount() { @@ -271,64 +274,88 @@ public class BlockJoinQuery extends Query { public int nextDoc() throws IOException { //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc); - if (nextChildDoc == NO_MORE_DOCS) { - //System.out.println(" end"); - return parentDoc = NO_MORE_DOCS; - } + // Loop until we hit a parentDoc that's accepted + while (true) { + if (nextChildDoc == NO_MORE_DOCS) { + //System.out.println(" end"); + return parentDoc = NO_MORE_DOCS; + } - // Gather all children sharing the same parent as nextChildDoc - parentDoc = parentBits.nextSetBit(nextChildDoc); - //System.out.println(" parentDoc=" + parentDoc); - assert parentDoc != -1; + // Gather all children sharing the same parent as + // nextChildDoc - float totalScore = 0; - float maxScore = Float.NEGATIVE_INFINITY; + parentDoc = parentBits.nextSetBit(nextChildDoc); - childDocUpto = 0; - do { - //System.out.println(" c=" + nextChildDoc); - if (pendingChildDocs.length == childDocUpto) { - pendingChildDocs = ArrayUtil.grow(pendingChildDocs); - if (scoreMode != ScoreMode.None) { + //System.out.println(" parentDoc=" + parentDoc); + assert parentDoc != -1; + + //System.out.println(" nextChildDoc=" + nextChildDoc); + if (acceptDocs != null && !acceptDocs.get(parentDoc)) { + // Parent doc not accepted; skip child docs until + // we hit a new parent doc: + do { + nextChildDoc = childScorer.nextDoc(); + } while (nextChildDoc < parentDoc); + continue; + } + + float totalScore = 0; + float totalFreq = 0; + float maxScore = Float.NEGATIVE_INFINITY; + float maxFreq = 0; + + childDocUpto = 0; + do { + + //System.out.println(" c=" + nextChildDoc); + if (pendingChildDocs.length == childDocUpto) { + pendingChildDocs = ArrayUtil.grow(pendingChildDocs); + } + if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) { pendingChildScores = ArrayUtil.grow(pendingChildScores); } - } - pendingChildDocs[childDocUpto] = nextChildDoc; - if (scoreMode != ScoreMode.None) { - // TODO: specialize this into dedicated classes per-scoreMode - final float childScore = childScorer.score(); - pendingChildScores[childDocUpto] = childScore; - maxScore = Math.max(childScore, maxScore); - totalScore += childScore; + pendingChildDocs[childDocUpto] = nextChildDoc; + if (scoreMode != ScoreMode.None) { + // TODO: specialize this into dedicated classes per-scoreMode + final float childScore = childScorer.score(); + final float childFreq = childScorer.freq(); + pendingChildScores[childDocUpto] = childScore; + maxScore = Math.max(childScore, maxScore); + maxFreq = Math.max(childFreq, maxFreq); + totalScore += childScore; + totalFreq += childFreq; + } + + // CHANGE: + childCollector.collect(nextChildDoc); + + childDocUpto++; + nextChildDoc = childScorer.nextDoc(); + } while (nextChildDoc < parentDoc); + + // Parent & child docs are supposed to be orthogonal: + assert nextChildDoc != parentDoc; + + switch(scoreMode) { + case Avg: + parentScore = totalScore / childDocUpto; + parentFreq = totalFreq / childDocUpto; + break; + case Max: + parentScore = maxScore; + parentFreq = maxFreq; + break; + case Total: + parentScore = totalScore; + parentFreq = totalFreq; + break; + case None: + break; } - // CHANGE: - childCollector.collect(nextChildDoc); - - childDocUpto++; - nextChildDoc = childScorer.nextDoc(); - } while (nextChildDoc < parentDoc); - //System.out.println(" nextChildDoc=" + nextChildDoc); - - // Parent & child docs are supposed to be orthogonal: - assert nextChildDoc != parentDoc; - - switch (scoreMode) { - case Avg: - parentScore = totalScore / childDocUpto; - break; - case Max: - parentScore = maxScore; - break; - case Total: - parentScore = totalScore; - break; - case None: - break; + //System.out.println(" return parentDoc=" + parentDoc); + return parentDoc; } - - //System.out.println(" return parentDoc=" + parentDoc); - return parentDoc; } @Override @@ -341,6 +368,11 @@ public class BlockJoinQuery extends Query { return parentScore; } + @Override + public float freq() throws IOException { + return parentFreq; + } + @Override public int advance(int parentTarget) throws IOException { @@ -359,7 +391,7 @@ public class BlockJoinQuery extends Query { return nextDoc(); } - final int prevParentDoc = parentBits.prevSetBit(parentTarget - 1); + prevParentDoc = parentBits.prevSetBit(parentTarget - 1); //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); assert prevParentDoc >= parentDoc; @@ -377,6 +409,15 @@ public class BlockJoinQuery extends Query { //System.out.println(" return nextParentDoc=" + nd); return nd; } + + public Explanation explain(int docBase) throws IOException { + int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc + int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc + return new ComplexExplanation( + true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end) + ); + } + } @Override @@ -427,8 +468,8 @@ public class BlockJoinQuery extends Query { } @Override - public Object clone() { - return new BlockJoinQuery((Query) origChildQuery.clone(), + public Query clone() { + return new BlockJoinQuery(origChildQuery.clone(), parentsFilter, scoreMode).setCollector(childCollector); } diff --git a/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java b/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java index f94c58648bf..ce7d7951e88 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java +++ b/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java @@ -1,12 +1,15 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.FixedBitDocSet; import java.io.IOException; +import java.util.Collection; import java.util.Set; /** @@ -48,7 +51,7 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new IncludeNestedDocsWeight(parentQuery, parentQuery.createWeight(searcher), parentFilter); } @@ -70,30 +73,25 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public float getValue() { - return parentWeight.getValue(); + public void normalize(float norm, float topLevelBoost) { + parentWeight.normalize(norm, topLevelBoost); } @Override - public float sumOfSquaredWeights() throws IOException { - return parentWeight.sumOfSquaredWeights() * parentQuery.getBoost() * parentQuery.getBoost(); + public float getValueForNormalization() throws IOException { + return parentWeight.getValueForNormalization(); // this query is never boosted so just delegate... } @Override - public void normalize(float norm) { - parentWeight.normalize(norm * parentQuery.getBoost()); - } - - @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - final Scorer parentScorer = parentWeight.scorer(reader, true, false); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + final Scorer parentScorer = parentWeight.scorer(context, true, false, acceptDocs); // no matches if (parentScorer == null) { return null; } - DocIdSet parents = parentsFilter.getDocIdSet(reader); + DocIdSet parents = parentsFilter.getDocIdSet(context, acceptDocs); if (parents == null) { // No matches return null; @@ -114,8 +112,8 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - return null; + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + return null; //Query is used internally and not by users, so explain can be empty } @Override @@ -154,12 +152,10 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public void visitSubScorers(Query parent, BooleanClause.Occur relationship, ScorerVisitor visitor) { - super.visitSubScorers(parent, relationship, visitor); - parentScorer.visitScorers(visitor); + public Collection getChildren() { + return parentScorer.getChildren(); } - @Override public int nextDoc() throws IOException { if (currentParentPointer == NO_MORE_DOCS) { return (currentDoc = NO_MORE_DOCS); @@ -187,7 +183,6 @@ public class IncludeNestedDocsQuery extends Query { return currentDoc; } - @Override public int advance(int target) throws IOException { if (target == NO_MORE_DOCS) { return (currentDoc = NO_MORE_DOCS); @@ -224,6 +219,10 @@ public class IncludeNestedDocsQuery extends Query { return parentScorer.score(); } + public float freq() throws IOException { + return parentScorer.freq(); + } + public int docID() { return currentDoc; } @@ -269,8 +268,8 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Object clone() { - Query clonedQuery = (Query) origParentQuery.clone(); + public Query clone() { + Query clonedQuery = origParentQuery.clone(); return new IncludeNestedDocsQuery(clonedQuery, this); } } diff --git a/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java b/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java index 99752f6e51c..1c7c8988514 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; @@ -47,8 +49,6 @@ public class NestedChildrenCollector extends FacetCollector { private FixedBitSet parentDocs; - private IndexReader currentReader; - public NestedChildrenCollector(FacetCollector collector, Filter parentFilter, Filter childFilter) { this.collector = collector; this.parentFilter = parentFilter; @@ -72,11 +72,12 @@ public class NestedChildrenCollector extends FacetCollector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); - currentReader = reader; - childDocs = DocSets.convert(reader, childFilter.getDocIdSet(reader)); - DocIdSet docIdSet = parentFilter.getDocIdSet(reader); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); + // Can use null as acceptedDocs here, since only live doc ids are being pushed to collect method. + DocIdSet docIdSet = parentFilter.getDocIdSet(context, null); + // Im ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here. + childDocs = DocSets.convert(context.reader(), childFilter.getDocIdSet(context, null)); if (docIdSet == null) { parentDocs = null; } else if (docIdSet instanceof FixedBitDocSet) { @@ -98,7 +99,7 @@ public class NestedChildrenCollector extends FacetCollector { } int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); for (int i = (parentDoc - 1); i > prevParentDoc; i--) { - if (!currentReader.isDeleted(i) && childDocs.get(i)) { + if (childDocs.get(i)) { collector.collect(i); } } diff --git a/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java b/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java index 872f04abb80..5940b40974d 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.PrefixFilter; +import org.apache.lucene.util.Bits; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; import java.io.IOException; @@ -41,8 +43,8 @@ public class NestedDocsFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return filter.getDocIdSet(context, acceptDocs); } @Override diff --git a/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java b/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java index ac833b743fc..7df2f3e8795 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.PrefixFilter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; @@ -42,14 +44,14 @@ public class NonNestedDocsFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - DocIdSet docSet = filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + DocIdSet docSet = filter.getDocIdSet(context, acceptDocs); if (docSet == null || docSet == DocIdSet.EMPTY_DOCIDSET) { // will almost never happen, and we need an OpenBitSet for the parent filter in // BlockJoinQuery, we cache it anyhow... - docSet = new FixedBitSet(reader.maxDoc()); + docSet = new FixedBitSet(context.reader().maxDoc()); } - ((FixedBitSet) docSet).flip(0, reader.maxDoc()); + ((FixedBitSet) docSet).flip(0, context.reader().maxDoc()); return docSet; }