Fixed bug in bool filter where it doesn't emit docs as hits while they are hits.

There is an optimization that executes bit based (slow) filters in the end. Matched docs could be unset if they didn't match with any of these filters. The bug was that also iterator based (fast) filters should be checked.
This change checks all should filters in the end part (if must or must_not clauses exists), so it can now correctly unset matched docs. The current bool filters requires that at least one should clause must match for docs to be match regardless of any other clauses.

Closes #4130
This commit is contained in:
Martijn van Groningen 2013-11-10 17:33:09 +01:00
parent c3e53f3889
commit 8e0291823a
2 changed files with 135 additions and 23 deletions

View File

@ -31,9 +31,7 @@ import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.*;
/**
* Similar to {@link org.apache.lucene.queries.BooleanFilter}.
@ -118,6 +116,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
boolean hasBits = false;
// But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
// that don't match with a must or must_not clause.
List<ResultClause> fastOrClauses = new ArrayList<ResultClause>();
for (int i = 0; i < results.size(); i++) {
ResultClause clause = results.get(i);
// we apply bits in based ones (slow) in the second run
@ -126,15 +125,22 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
continue;
}
if (clause.clause.getOccur() == Occur.SHOULD) {
DocIdSetIterator it = clause.docIdSet.iterator();
if (it == null) {
continue;
if (hasMustClauses || hasMustNotClauses) {
fastOrClauses.add(clause);
} else if (res == null) {
DocIdSetIterator it = clause.docIdSet.iterator();
if (it != null) {
hasNonEmptyShouldClause = true;
res = new FixedBitSet(reader.maxDoc());
res.or(it);
}
} else {
DocIdSetIterator it = clause.docIdSet.iterator();
if (it != null) {
hasNonEmptyShouldClause = true;
res.or(it);
}
}
hasNonEmptyShouldClause = true;
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
}
res.or(it);
}
}
@ -172,6 +178,24 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
}
if (!hasBits) {
if (!fastOrClauses.isEmpty()) {
DocIdSetIterator it = res.iterator();
at_least_one_should_clause_iter:
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
for (ResultClause fastOrClause : fastOrClauses) {
DocIdSetIterator clauseIterator = fastOrClause.iterator();
if (clauseIterator == null) {
continue;
}
if (iteratorMatch(clauseIterator, setDoc)) {
hasNonEmptyShouldClause = true;
continue at_least_one_should_clause_iter;
}
}
res.clear(setDoc);
}
}
if (hasShouldClauses && !hasNonEmptyShouldClause) {
return null;
} else {
@ -181,7 +205,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
// we have some clauses with bits, apply them...
// we let the "res" drive the computation, and check Bits for that
List<ResultClause> orClauses = new ArrayList<ResultClause>();
List<ResultClause> slowOrClauses = new ArrayList<ResultClause>();
for (int i = 0; i < results.size(); i++) {
ResultClause clause = results.get(i);
if (clause.bits == null) {
@ -189,7 +213,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
}
if (clause.clause.getOccur() == Occur.SHOULD) {
if (hasMustClauses || hasMustNotClauses) {
orClauses.add(clause);
slowOrClauses.add(clause);
} else {
if (res == null) {
DocIdSetIterator it = clause.docIdSet.iterator();
@ -199,7 +223,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
hasNonEmptyShouldClause = true;
res = new FixedBitSet(reader.maxDoc());
res.or(it);
} else if (!hasMustNotClauses && !hasMustClauses) {
} else {
for (int doc = 0; doc < reader.maxDoc(); doc++) {
if (!res.get(doc) && clause.bits.get(doc)) {
hasNonEmptyShouldClause = true;
@ -249,16 +273,27 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
}
// From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
// is already a must or must_not clause. However in the current ES bool filter behaviour all should clauses
// must match in order for a doc to be a match. What we do here is checking if matched docs match with any
// should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
if (!orClauses.isEmpty()) {
// is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
// clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
// any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
DocIdSetIterator it = res.iterator();
res: for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
for (ResultClause orClause : orClauses) {
if (orClause.bits.get(setDoc)) {
at_least_one_should_clause_iter:
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
for (ResultClause fastOrClause : fastOrClauses) {
DocIdSetIterator clauseIterator = fastOrClause.iterator();
if (it == null) {
continue;
}
if (iteratorMatch(clauseIterator, setDoc)) {
hasNonEmptyShouldClause = true;
continue res;
continue at_least_one_should_clause_iter;
}
}
for (ResultClause slowOrClause : slowOrClauses) {
if (slowOrClause.bits.get(setDoc)) {
hasNonEmptyShouldClause = true;
continue at_least_one_should_clause_iter;
}
}
res.clear(setDoc);
@ -338,14 +373,44 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
}
static class ResultClause {
public final DocIdSet docIdSet;
public final Bits bits;
public final FilterClause clause;
DocIdSetIterator docIdSetIterator;
ResultClause(DocIdSet docIdSet, Bits bits, FilterClause clause) {
this.docIdSet = docIdSet;
this.bits = bits;
this.clause = clause;
}
/**
* @return An iterator, but caches it for subsequent usage. Don't use if iterator is consumed in one invocation.
*/
DocIdSetIterator iterator() throws IOException {
if (docIdSetIterator != null) {
return docIdSetIterator;
} else {
return docIdSetIterator = docIdSet.iterator();
}
}
}
static boolean iteratorMatch(DocIdSetIterator docIdSetIterator, int target) throws IOException {
assert docIdSetIterator != null;
int current = docIdSetIterator.docID();
if (current == DocIdSetIterator.NO_MORE_DOCS || target < current) {
return false;
} else {
if (current == target) {
return true;
} else {
return docIdSetIterator.advance(target) == target;
}
}
}
}

View File

@ -345,6 +345,54 @@ public class XBooleanFilterTests extends ElasticsearchLuceneTestCase {
assertThat(result.get(2), equalTo(true));
}
@Test
// See issue: https://github.com/elasticsearch/elasticsearch/issues/4130
public void testOneFastMustNotOneFastShouldAndOneSlowShould() throws Exception {
XBooleanFilter booleanFilter = createBooleanFilter(
newFilterClause(4, 'v', MUST_NOT, false),
newFilterClause(4, 'z', SHOULD, false),
newFilterClause(4, 'x', SHOULD, true)
);
FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(2));
assertThat(result.get(0), equalTo(false));
assertThat(result.get(1), equalTo(true));
assertThat(result.get(2), equalTo(true));
}
@Test
public void testOneFastShouldClauseAndOneSlowShouldClause() throws Exception {
XBooleanFilter booleanFilter = createBooleanFilter(
newFilterClause(4, 'z', SHOULD, false),
newFilterClause(4, 'x', SHOULD, true)
);
FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(2));
assertThat(result.get(0), equalTo(false));
assertThat(result.get(1), equalTo(true));
assertThat(result.get(2), equalTo(true));
}
@Test
public void testOneMustClauseOneFastShouldClauseAndOneSlowShouldClause() throws Exception {
XBooleanFilter booleanFilter = createBooleanFilter(
newFilterClause(0, 'a', MUST, false),
newFilterClause(4, 'z', SHOULD, false),
newFilterClause(4, 'x', SHOULD, true)
);
FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(2));
assertThat(result.get(0), equalTo(false));
assertThat(result.get(1), equalTo(true));
assertThat(result.get(2), equalTo(true));
}
private static FilterClause newFilterClause(int field, char character, BooleanClause.Occur occur, boolean slowerBitsBackedFilter) {
Filter filter;
if (slowerBitsBackedFilter) {
@ -365,7 +413,6 @@ public class XBooleanFilterTests extends ElasticsearchLuceneTestCase {
}
@Test
@AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/pull/4144/")
public void testRandom() throws IOException {
int iterations = atLeast(400); // don't worry that is fast!
for (int iter = 0; iter < iterations; iter++) {