Improved XBooleanFilter by adding drive logic for bit based filter impl and adding unit test, which tests all possible XBooleanFilter options.
This commit is contained in:
parent
8f5bca43fb
commit
826a6ab02a
|
@ -165,15 +165,30 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (clause.clause.getOccur() == Occur.SHOULD) {
|
if (clause.clause.getOccur() == Occur.SHOULD) {
|
||||||
// TODO: we should let res drive it, and check on all unset bits on it with Bits
|
if (res == null) {
|
||||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||||
if (it == null) {
|
if (it == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (res == null) {
|
|
||||||
res = new FixedBitSet(reader.maxDoc());
|
res = new FixedBitSet(reader.maxDoc());
|
||||||
}
|
|
||||||
res.or(it);
|
res.or(it);
|
||||||
|
} else {
|
||||||
|
Bits bits = clause.bits;
|
||||||
|
// use the "res" to drive the iteration
|
||||||
|
int lastSetDoc = res.nextSetBit(0);
|
||||||
|
DocIdSetIterator it = res.iterator();
|
||||||
|
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
|
||||||
|
int diff = setDoc - lastSetDoc;
|
||||||
|
if (diff > 1) {
|
||||||
|
for (int unsetDoc = lastSetDoc + 1; unsetDoc < setDoc; unsetDoc++) {
|
||||||
|
if (bits.get(unsetDoc)) {
|
||||||
|
res.set(unsetDoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastSetDoc = setDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (clause.clause.getOccur() == Occur.MUST) {
|
} else if (clause.clause.getOccur() == Occur.MUST) {
|
||||||
if (res == null) {
|
if (res == null) {
|
||||||
// nothing we can do, just or it...
|
// nothing we can do, just or it...
|
||||||
|
@ -217,12 +232,6 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context, Bits acceptDocs)
|
|
||||||
throws IOException {
|
|
||||||
final DocIdSet set = filter.getDocIdSet(context, acceptDocs);
|
|
||||||
return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a new FilterClause to the Boolean Filter container
|
* Adds a new FilterClause to the Boolean Filter container
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,240 @@
|
||||||
|
package org.elasticsearch.test.unit.common.lucene.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.queries.FilterClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.FieldCacheTermsFilter;
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||||
|
import org.elasticsearch.common.lucene.search.XBooleanFilter;
|
||||||
|
import org.testng.annotations.AfterClass;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.apache.lucene.search.BooleanClause.Occur.*;
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.core.IsEqual.equalTo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class XBooleanFilterTests {
|
||||||
|
|
||||||
|
private Directory directory;
|
||||||
|
private AtomicReader reader;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void setup() throws Exception {
|
||||||
|
char[][] documentMatrix = new char[][] {
|
||||||
|
{'a', 'b', 'c', 'd'},
|
||||||
|
{'a', 'b', 'c', 'd'},
|
||||||
|
{'a', 'a', 'a', 'a'}
|
||||||
|
};
|
||||||
|
|
||||||
|
List<Document> documents = new ArrayList<Document>(documentMatrix.length);
|
||||||
|
for (char[] fields : documentMatrix) {
|
||||||
|
Document document = new Document();
|
||||||
|
for (int i = 0; i < fields.length; i++) {
|
||||||
|
document.add(new StringField(Integer.toString(i), String.valueOf(fields[i]), Field.Store.NO));
|
||||||
|
}
|
||||||
|
documents.add(document);
|
||||||
|
}
|
||||||
|
directory = new RAMDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(directory, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()));
|
||||||
|
w.addDocuments(documents);
|
||||||
|
w.close();
|
||||||
|
reader = new SlowCompositeReaderWrapper(DirectoryReader.open(directory));
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithTwoClausesOfEachOccur_allFixedBitsetFilters() throws Exception {
|
||||||
|
List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false),
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false),
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false),
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false),
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false)
|
||||||
|
));
|
||||||
|
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithTwoClausesOfEachOccur_allBitsBasedFilters() throws Exception {
|
||||||
|
List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true),
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true),
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true),
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true),
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true)
|
||||||
|
));
|
||||||
|
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithTwoClausesOfEachOccur_allFilterTypes() throws Exception {
|
||||||
|
List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false),
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false),
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false),
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false),
|
||||||
|
newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false)
|
||||||
|
));
|
||||||
|
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
booleanFilters.clear();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true),
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true),
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true),
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true)
|
||||||
|
));
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true),
|
||||||
|
newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true),
|
||||||
|
newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true)
|
||||||
|
));
|
||||||
|
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithTwoClausesOfEachOccur_singleClauseOptimisation() throws Exception {
|
||||||
|
List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(1, 'b', MUST, true)
|
||||||
|
));
|
||||||
|
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
booleanFilters.clear();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(1, 'c', MUST_NOT, true)
|
||||||
|
));
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(3));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
booleanFilters.clear();
|
||||||
|
booleanFilters.add(createBooleanFilter(
|
||||||
|
newFilterClause(2, 'c', SHOULD, true)
|
||||||
|
));
|
||||||
|
for (XBooleanFilter booleanFilter : booleanFilters) {
|
||||||
|
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||||
|
result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
|
||||||
|
assertThat(result.cardinality(), equalTo(2));
|
||||||
|
assertThat(result.get(0), equalTo(true));
|
||||||
|
assertThat(result.get(1), equalTo(true));
|
||||||
|
assertThat(result.get(2), equalTo(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static FilterClause newFilterClause(int field, char character, BooleanClause.Occur occur, boolean slowerBitsBackedFilter) {
|
||||||
|
Filter filter;
|
||||||
|
if (slowerBitsBackedFilter) {
|
||||||
|
filter = new FieldCacheTermsFilter(String.valueOf(field), String.valueOf(character));
|
||||||
|
} else {
|
||||||
|
Term term = new Term(String.valueOf(field), String.valueOf(character));
|
||||||
|
filter = new TermFilter(term);
|
||||||
|
}
|
||||||
|
return new FilterClause(filter, occur);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static XBooleanFilter createBooleanFilter(FilterClause... clauses) {
|
||||||
|
XBooleanFilter booleanFilter = new XBooleanFilter();
|
||||||
|
for (FilterClause clause : clauses) {
|
||||||
|
booleanFilter.add(clause);
|
||||||
|
}
|
||||||
|
return booleanFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue