imrpvoe boolean filter performance

This commit is contained in:
kimchy 2010-09-24 00:59:26 +02:00
parent 31fd196687
commit 9bb86ea865
9 changed files with 241 additions and 49 deletions

View File

@ -1,40 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
public class PublicBooleanFilter extends BooleanFilter {
public List<Filter> getShouldFilters() {
return this.shouldFilters;
}
public List<Filter> getMustFilters() {
return this.mustFilters;
}
public List<Filter> getNotFilters() {
return this.notFilters;
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import java.io.IOException;
@ -123,8 +124,8 @@ public class CustomFieldQuery extends FieldQuery {
// ignore
}
}
} else if (sourceFilter instanceof PublicBooleanFilter) {
PublicBooleanFilter booleanFilter = (PublicBooleanFilter) sourceFilter;
} else if (sourceFilter instanceof XBooleanFilter) {
XBooleanFilter booleanFilter = (XBooleanFilter) sourceFilter;
for (Filter filter : booleanFilter.getMustFilters()) {
flatten(filter, flatQueries);
}

View File

@ -44,6 +44,10 @@ public class OpenBitDocSet extends DocSet {
this.set = new OpenBitSetDISI(disi, numBits);
}
public OpenBitSet set() {
return set;
}
@Override public boolean get(int doc) throws IOException {
return set.fastGet(doc);
}

View File

@ -0,0 +1,218 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.OpenBitDocSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
// LUCENE MONITOR: added to take into account DocSet that wraps OpenBitSet when optimizing or/and/...
public class XBooleanFilter extends Filter {
ArrayList<Filter> shouldFilters = null;
ArrayList<Filter> notFilters = null;
ArrayList<Filter> mustFilters = null;
private DocIdSetIterator getDISI(ArrayList<Filter> filters, int index, IndexReader reader)
throws IOException {
return filters.get(index).getDocIdSet(reader).iterator();
}
public List<Filter> getShouldFilters() {
return this.shouldFilters;
}
public List<Filter> getMustFilters() {
return this.mustFilters;
}
public List<Filter> getNotFilters() {
return this.notFilters;
}
/**
* Returns the a DocIdSetIterator representing the Boolean composition
* of the filters that have been added.
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
OpenBitSetDISI res = null;
if (shouldFilters != null) {
for (int i = 0; i < shouldFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
} else {
DocIdSet dis = shouldFilters.get(i).getDocIdSet(reader);
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.or((OpenBitSet) dis);
} else if (dis instanceof OpenBitDocSet) {
res.or(((OpenBitDocSet) dis).set());
} else {
res.inPlaceOr(getDISI(shouldFilters, i, reader));
}
}
}
}
if (notFilters != null) {
for (int i = 0; i < notFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
} else {
DocIdSet dis = notFilters.get(i).getDocIdSet(reader);
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.andNot((OpenBitSet) dis);
} else if (dis instanceof OpenBitDocSet) {
res.andNot(((OpenBitDocSet) dis).set());
} else {
res.inPlaceNot(getDISI(notFilters, i, reader));
}
}
}
}
if (mustFilters != null) {
for (int i = 0; i < mustFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
} else {
DocIdSet dis = mustFilters.get(i).getDocIdSet(reader);
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.and((OpenBitSet) dis);
} else if (dis instanceof OpenBitDocSet) {
res.and(((OpenBitDocSet) dis).set());
} else {
res.inPlaceAnd(getDISI(mustFilters, i, reader));
}
}
}
}
if (res != null)
return new OpenBitDocSet(res);
return DocSet.EMPTY_DOC_SET;
}
/**
* Provide a SortedVIntList when it is definitely smaller
* than an OpenBitSet.
*
* @deprecated Either use CachingWrapperFilter, or
* switch to a different DocIdSet implementation yourself.
* This method will be removed in Lucene 4.0
*/
protected final DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return result;
}
/**
* Adds a new FilterClause to the Boolean Filter container
*
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
public void add(FilterClause filterClause) {
if (filterClause.getOccur().equals(BooleanClause.Occur.MUST)) {
if (mustFilters == null) {
mustFilters = new ArrayList<Filter>();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(BooleanClause.Occur.SHOULD)) {
if (shouldFilters == null) {
shouldFilters = new ArrayList<Filter>();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
if (notFilters == null) {
notFilters = new ArrayList<Filter>();
}
notFilters.add(filterClause.getFilter());
}
}
private boolean equalFilters(ArrayList<Filter> filters1, ArrayList<Filter> filters2) {
return (filters1 == filters2) ||
((filters1 != null) && filters1.equals(filters2));
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
XBooleanFilter other = (XBooleanFilter) obj;
return equalFilters(notFilters, other.notFilters)
&& equalFilters(mustFilters, other.mustFilters)
&& equalFilters(shouldFilters, other.shouldFilters);
}
@Override
public int hashCode() {
int hash = 7;
hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
return hash;
}
/**
* Prints a user-readable version of this query.
*/
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("BooleanFilter(");
appendFilters(shouldFilters, "", buffer);
appendFilters(mustFilters, "+", buffer);
appendFilters(notFilters, "-", buffer);
buffer.append(")");
return buffer.toString();
}
private void appendFilters(ArrayList<Filter> filters, String occurString, StringBuilder buffer) {
if (filters != null) {
for (int i = 0; i < filters.size(); i++) {
buffer.append(' ');
buffer.append(occurString);
buffer.append(filters.get(i).toString());
}
}
}
}

View File

@ -19,8 +19,11 @@
package org.elasticsearch.index.query.xcontent;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.OpenFilterClause;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.AbstractIndexComponent;
@ -92,7 +95,7 @@ public class BoolFilterParser extends AbstractIndexComponent implements XContent
}
}
BooleanFilter filter = new PublicBooleanFilter();
XBooleanFilter filter = new XBooleanFilter();
for (OpenFilterClause filterClause : clauses) {
if (cache) {

View File

@ -21,7 +21,10 @@ package org.elasticsearch.index.shard.service;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ThreadInterruptedException;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
@ -30,6 +33,7 @@ import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
@ -517,7 +521,7 @@ public class InternalIndexShard extends AbstractIndexShardComponent implements I
}
query = new FilteredQuery(query, indexCache.filter().cache(docMapper.typeFilter()));
} else {
BooleanFilter booleanFilter = new BooleanFilter();
XBooleanFilter booleanFilter = new XBooleanFilter();
for (String type : types) {
DocumentMapper docMapper = mapperService.documentMapper(type);
if (docMapper == null) {

View File

@ -25,6 +25,7 @@ import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.lucene.search.NoopCollector;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchPhase;
@ -67,7 +68,7 @@ public class FacetsPhase implements SearchPhase {
DocumentMapper docMapper = context.mapperService().documentMapper(type);
query = new FilteredQuery(query, context.filterCache().cache(docMapper.typeFilter()));
} else {
BooleanFilter booleanFilter = new BooleanFilter();
XBooleanFilter booleanFilter = new XBooleanFilter();
for (String type : context.types()) {
DocumentMapper docMapper = context.mapperService().documentMapper(type);
booleanFilter.add(new FilterClause(context.filterCache().cache(docMapper.typeFilter()), BooleanClause.Occur.SHOULD));

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.query;
import org.apache.lucene.search.*;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.function.BoostScoreFunction;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import org.elasticsearch.index.Index;
@ -88,7 +89,7 @@ public class QueryPhase implements SearchPhase {
}
query = new FilteredQuery(query, searchContext.filterCache().cache(docMapper.typeFilter()));
} else {
BooleanFilter booleanFilter = new BooleanFilter();
XBooleanFilter booleanFilter = new XBooleanFilter();
for (String type : searchContext.types()) {
DocumentMapper docMapper = searchContext.mapperService().documentMapper(type);
if (docMapper == null) {

View File

@ -555,7 +555,7 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
BooleanFilter booleanFilter = (BooleanFilter) filteredQuery.getFilter();
XBooleanFilter booleanFilter = (XBooleanFilter) filteredQuery.getFilter();
// TODO get the content and test
}