From 32bf7607c79da56a6367cc610622bfc4c80df246 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 12 Dec 2012 20:12:51 -0800 Subject: [PATCH] optimize boolean filter to use bits driven by result bitset --- .../common/lucene/search/XBooleanFilter.java | 168 +++++++++++++++--- 1 file changed, 139 insertions(+), 29 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java index 2d9972ca867..5369b3d1cde 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java @@ -67,55 +67,153 @@ public class XBooleanFilter extends Filter implements Iterable { } } // SHOULD or MUST, just return the set... + if (DocIdSets.isEmpty(set)) { + return null; + } return set; } + // first, go over and see if we can shortcut the execution + // and gather Bits if we need to + List results = new ArrayList(clauses.size()); boolean hasShouldClauses = false; - for (final FilterClause fc : clauses) { - if (fc.getOccur() == Occur.SHOULD) { + boolean hasNonEmptyShouldClause = false; + for (int i = 0; i < clauses.size(); i++) { + FilterClause clause = clauses.get(i); + DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); + if (clause.getOccur() == Occur.MUST) { + if (DocIdSets.isEmpty(set)) { + return null; + } + } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; - final DocIdSetIterator disi = getDISI(fc.getFilter(), context, acceptDocs); - if (disi == null) continue; - if (res == null) { - res = new FixedBitSet(reader.maxDoc()); + if (DocIdSets.isEmpty(set)) { + continue; + } + hasNonEmptyShouldClause = true; + } else if (clause.getOccur() == Occur.MUST_NOT) { + if (DocIdSets.isEmpty(set)) { + // we mark empty ones as null for must_not, handle it in the next run... + results.add(new ResultClause(null, null, clause)); + continue; } - res.or(disi); } + Bits bits = null; + if (!DocIdSets.isFastIterator(set)) { + bits = set.bits(); + } + results.add(new ResultClause(set, bits, clause)); } - if (hasShouldClauses && res == null) + + if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; - - for (final FilterClause fc : clauses) { - if (fc.getOccur() == Occur.MUST_NOT) { - if (res == null) { - assert !hasShouldClauses; - res = new FixedBitSet(reader.maxDoc()); - res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs - } - final DocIdSetIterator disi = getDISI(fc.getFilter(), context, acceptDocs); - if (disi != null) { - res.andNot(disi); - } - } } - for (final FilterClause fc : clauses) { - if (fc.getOccur() == Occur.MUST) { - final DocIdSetIterator disi = getDISI(fc.getFilter(), context, acceptDocs); - if (disi == null) { + // now, go over the clauses and apply the "fast" ones... + boolean hasBits = false; + for (int i = 0; i < results.size(); i++) { + ResultClause clause = results.get(i); + // we apply bits in based ones (slow) in the second run + if (clause.bits != null) { + hasBits = true; + continue; + } + if (clause.clause.getOccur() == Occur.SHOULD) { + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it == null) { + continue; + } + if (res == null) { + res = new FixedBitSet(reader.maxDoc()); + } + res.or(it); + } else if (clause.clause.getOccur() == Occur.MUST) { + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); - res.or(disi); + res.or(it); } else { - res.and(disi); + res.and(it); + } + } else if (clause.clause.getOccur() == Occur.MUST_NOT) { + if (res == null) { + res = new FixedBitSet(reader.maxDoc()); + res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs + } + if (clause.docIdSet != null) { + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it != null) { + res.andNot(it); + } + } + } + } + + if (!hasBits) { + return res; + } + + // we have some clauses with bits, apply them... + // we let the "res" drive the computation, and check Bits for that + for (int i = 0; i < results.size(); i++) { + ResultClause clause = results.get(i); + // we apply bits in based ones (slow) in the second run + if (clause.bits == null) { + continue; + } + if (clause.clause.getOccur() == Occur.SHOULD) { + // TODO: we should let res drive it, and check on all unset bits on it with Bits + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it == null) { + continue; + } + if (res == null) { + res = new FixedBitSet(reader.maxDoc()); + } + res.or(it); + } else if (clause.clause.getOccur() == Occur.MUST) { + if (res == null) { + // nothing we can do, just or it... + res = new FixedBitSet(reader.maxDoc()); + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it == null) { + return null; + } + res.or(it); + } else { + Bits bits = clause.bits; + // use the "res" to drive the iteration + DocIdSetIterator it = res.iterator(); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + if (!bits.get(doc)) { + res.clear(doc); + } + } + } + } else if (clause.clause.getOccur() == Occur.MUST_NOT) { + if (res == null) { + res = new FixedBitSet(reader.maxDoc()); + res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it != null) { + res.andNot(it); + } + } else { + Bits bits = clause.bits; + // let res drive the iteration + DocIdSetIterator it = res.iterator(); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + if (bits.get(doc)) { + res.clear(doc); + } + } } } } - // don't wrap, based on our own strategy of doing the wrapping on the filtered query level - //return res != null ? BitsFilteredDocIdSet.wrap(res, acceptDocs) : DocIdSet.EMPTY_DOCIDSET; return res; } @@ -188,4 +286,16 @@ public class XBooleanFilter extends Filter implements Iterable { } return buffer.append(')').toString(); } + + static class ResultClause { + public final DocIdSet docIdSet; + public final Bits bits; + public final FilterClause clause; + + ResultClause(DocIdSet docIdSet, Bits bits, FilterClause clause) { + this.docIdSet = docIdSet; + this.bits = bits; + this.clause = clause; + } + } }