From 826a6ab02a83147d98f08bebd4c92bf9088d3ff8 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 13 Dec 2012 22:59:41 +0100 Subject: [PATCH] Improved XBooleanFilter by adding drive logic for bit based filter impl and adding unit test, which tests all possible XBooleanFilter options. --- .../common/lucene/search/XBooleanFilter.java | 33 ++- .../lucene/search/XBooleanFilterTests.java | 240 ++++++++++++++++++ 2 files changed, 261 insertions(+), 12 deletions(-) create mode 100644 src/test/java/org/elasticsearch/test/unit/common/lucene/search/XBooleanFilterTests.java diff --git a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java index 5369b3d1cde..77b5556ec21 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java @@ -165,15 +165,30 @@ public class XBooleanFilter extends Filter implements Iterable { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { - // TODO: we should let res drive it, and check on all unset bits on it with Bits - DocIdSetIterator it = clause.docIdSet.iterator(); - if (it == null) { - continue; - } if (res == null) { + DocIdSetIterator it = clause.docIdSet.iterator(); + if (it == null) { + continue; + } res = new FixedBitSet(reader.maxDoc()); + res.or(it); + } else { + Bits bits = clause.bits; + // use the "res" to drive the iteration + int lastSetDoc = res.nextSetBit(0); + DocIdSetIterator it = res.iterator(); + for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { + int diff = setDoc - lastSetDoc; + if (diff > 1) { + for (int unsetDoc = lastSetDoc + 1; unsetDoc < setDoc; unsetDoc++) { + if (bits.get(unsetDoc)) { + res.set(unsetDoc); + } + } + } + lastSetDoc = setDoc; + } } - res.or(it); } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... @@ -217,12 +232,6 @@ public class XBooleanFilter extends Filter implements Iterable { return res; } - private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context, Bits acceptDocs) - throws IOException { - final DocIdSet set = filter.getDocIdSet(context, acceptDocs); - return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator(); - } - /** * Adds a new FilterClause to the Boolean Filter container * diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/XBooleanFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/XBooleanFilterTests.java new file mode 100644 index 00000000000..c4521071961 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/XBooleanFilterTests.java @@ -0,0 +1,240 @@ +package org.elasticsearch.test.unit.common.lucene.search; + +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.*; +import org.apache.lucene.queries.FilterClause; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.FieldCacheTermsFilter; +import org.apache.lucene.search.Filter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.TermFilter; +import org.elasticsearch.common.lucene.search.XBooleanFilter; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.apache.lucene.search.BooleanClause.Occur.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +/** + */ +public class XBooleanFilterTests { + + private Directory directory; + private AtomicReader reader; + + @BeforeClass + public void setup() throws Exception { + char[][] documentMatrix = new char[][] { + {'a', 'b', 'c', 'd'}, + {'a', 'b', 'c', 'd'}, + {'a', 'a', 'a', 'a'} + }; + + List documents = new ArrayList(documentMatrix.length); + for (char[] fields : documentMatrix) { + Document document = new Document(); + for (int i = 0; i < fields.length; i++) { + document.add(new StringField(Integer.toString(i), String.valueOf(fields[i]), Field.Store.NO)); + } + documents.add(document); + } + directory = new RAMDirectory(); + IndexWriter w = new IndexWriter(directory, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer())); + w.addDocuments(documents); + w.close(); + reader = new SlowCompositeReaderWrapper(DirectoryReader.open(directory)); + } + + @AfterClass + public void tearDown() throws Exception { + reader.close(); + directory.close(); + } + + @Test + public void testWithTwoClausesOfEachOccur_allFixedBitsetFilters() throws Exception { + List booleanFilters = new ArrayList(); + booleanFilters.add(createBooleanFilter( + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false), + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false), + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false), + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false), + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false), + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false), + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false) + )); + + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + } + + @Test + public void testWithTwoClausesOfEachOccur_allBitsBasedFilters() throws Exception { + List booleanFilters = new ArrayList(); + booleanFilters.add(createBooleanFilter( + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true), + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true), + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true), + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true), + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true), + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true), + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true) + )); + + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + } + + @Test + public void testWithTwoClausesOfEachOccur_allFilterTypes() throws Exception { + List booleanFilters = new ArrayList(); + booleanFilters.add(createBooleanFilter( + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false), + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false), + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false), + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false), + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false), + newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false), + newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false) + )); + + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + + booleanFilters.clear(); + booleanFilters.add(createBooleanFilter( + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true), + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true), + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true), + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true), + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true) + )); + booleanFilters.add(createBooleanFilter( + newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true), + newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true), + newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true) + )); + + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + } + + @Test + public void testWithTwoClausesOfEachOccur_singleClauseOptimisation() throws Exception { + List booleanFilters = new ArrayList(); + booleanFilters.add(createBooleanFilter( + newFilterClause(1, 'b', MUST, true) + )); + + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + + booleanFilters.clear(); + booleanFilters.add(createBooleanFilter( + newFilterClause(1, 'c', MUST_NOT, true) + )); + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(3)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(true)); + } + + booleanFilters.clear(); + booleanFilters.add(createBooleanFilter( + newFilterClause(2, 'c', SHOULD, true) + )); + for (XBooleanFilter booleanFilter : booleanFilters) { + FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); + assertThat(result.cardinality(), equalTo(2)); + assertThat(result.get(0), equalTo(true)); + assertThat(result.get(1), equalTo(true)); + assertThat(result.get(2), equalTo(false)); + } + } + + private static FilterClause newFilterClause(int field, char character, BooleanClause.Occur occur, boolean slowerBitsBackedFilter) { + Filter filter; + if (slowerBitsBackedFilter) { + filter = new FieldCacheTermsFilter(String.valueOf(field), String.valueOf(character)); + } else { + Term term = new Term(String.valueOf(field), String.valueOf(character)); + filter = new TermFilter(term); + } + return new FilterClause(filter, occur); + } + + private static XBooleanFilter createBooleanFilter(FilterClause... clauses) { + XBooleanFilter booleanFilter = new XBooleanFilter(); + for (FilterClause clause : clauses) { + booleanFilter.add(clause); + } + return booleanFilter; + } + +}