terms filter uses less memory when cached

move from a TreeSet to an array, sorting on creation
This commit is contained in:
Shay Banon 2012-06-07 23:34:21 +02:00
parent 72646fdfea
commit ccea825966
7 changed files with 144 additions and 57 deletions

View File

@ -0,0 +1,108 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
import java.util.Arrays;
/**
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
* when cached, and also uses bulk read
*/
// LUCENE MONITOR: Against TermsFilter
public class XTermsFilter extends Filter {
private final Term[] terms;
public XTermsFilter(Term term) {
this.terms = new Term[]{term};
}
public XTermsFilter(Term[] terms) {
Arrays.sort(terms);
this.terms = terms;
}
public Term[] getTerms() {
return terms;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
XTermsFilter test = (XTermsFilter) obj;
return Arrays.equals(terms, test.terms);
}
@Override
public int hashCode() {
return Arrays.hashCode(terms);
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
FixedBitSet result = null;
TermDocs td = reader.termDocs();
try {
// batch read, in Lucene 4.0 its no longer needed
int[] docs = new int[32];
int[] freqs = new int[32];
for (Term term : terms) {
td.seek(term);
int number = td.read(docs, freqs);
if (number > 0) {
if (result == null) {
result = new FixedBitSet(reader.maxDoc());
}
while (number > 0) {
for (int i = 0; i < number; i++) {
result.set(docs[i]);
}
number = td.read(docs, freqs);
}
}
}
} finally {
td.close();
}
return result;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (Term term : terms) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(term);
}
return builder.toString();
}
}

View File

@ -105,8 +105,8 @@ public class CustomFieldQuery extends FieldQuery {
}
if (sourceFilter instanceof TermFilter) {
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
} else if (sourceFilter instanceof PublicTermsFilter) {
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
} else if (sourceFilter instanceof XTermsFilter) {
XTermsFilter termsFilter = (XTermsFilter) sourceFilter;
for (Term term : termsFilter.getTerms()) {
flatten(new TermQuery(term), reader, flatQueries);
}

View File

@ -31,7 +31,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.PublicTermsFilter;
import org.apache.lucene.search.XTermsFilter;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.Streams;
@ -336,11 +336,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
}
}
if (useTermsFilter) {
PublicTermsFilter termsFilter = new PublicTermsFilter();
for (String type : types) {
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
Term[] typesTerms = new Term[types.length];
for (int i = 0; i < typesTerms.length; i++) {
typesTerms[i] = TypeFieldMapper.TERM_FACTORY.createTerm(types[i]);
}
return termsFilter;
return new XTermsFilter(typesTerms);
} else {
XBooleanFilter bool = new XBooleanFilter();
for (String type : types) {
@ -355,27 +355,6 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
}
}
/**
* A filter to filter based on several types.
*/
public Filter typesFilterFailOnMissing(String... types) throws TypeMissingException {
if (types.length == 1) {
DocumentMapper docMapper = documentMapper(types[0]);
if (docMapper == null) {
throw new TypeMissingException(index, types[0]);
}
return docMapper.typeFilter();
}
PublicTermsFilter termsFilter = new PublicTermsFilter();
for (String type : types) {
if (!hasMapping(type)) {
throw new TypeMissingException(index, type);
}
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
}
return termsFilter;
}
/**
* Returns {@link FieldMappers} for all the {@link FieldMapper}s that are registered
* under the given name across all the different {@link DocumentMapper} types.

View File

@ -24,8 +24,8 @@ import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.PublicTermsFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.XTermsFilter;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
@ -199,11 +199,12 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
return super.fieldFilter(value, context);
}
// we use all types, cause we don't know if its exact or not...
PublicTermsFilter filter = new PublicTermsFilter();
Term[] typesTerms = new Term[context.mapperService().types().size()];
int i = 0;
for (String type : context.mapperService().types()) {
filter.addTerm(names.createIndexNameTerm(Uid.createUid(type, value)));
typesTerms[i++] = names.createIndexNameTerm(Uid.createUid(type, value));
}
return filter;
return new XTermsFilter(typesTerms);
}
/**

View File

@ -22,7 +22,7 @@ package org.elasticsearch.index.query;
import com.google.common.collect.Lists;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.PublicTermsFilter;
import org.apache.lucene.search.XTermsFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.AndFilter;
import org.elasticsearch.common.lucene.search.TermFilter;
@ -115,17 +115,17 @@ public class TermsFilterParser implements FilterParser {
try {
Filter filter;
if ("plain".equals(execution)) {
PublicTermsFilter termsFilter = new PublicTermsFilter();
Term[] filterTerms = new Term[terms.size()];
if (fieldMapper != null) {
for (String term : terms) {
termsFilter.addTerm(fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(term)));
for (int i = 0; i < filterTerms.length; i++) {
filterTerms[i] = fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(terms.get(i)));
}
} else {
for (String term : terms) {
termsFilter.addTerm(new Term(fieldName, term));
for (int i = 0; i < filterTerms.length; i++) {
filterTerms[i] = new Term(fieldName, terms.get(i));
}
}
filter = termsFilter;
filter = new XTermsFilter(filterTerms);
// cache the whole filter by default, or if explicitly told to
if (cache == null || cache) {
filter = parseContext.cacheFilter(filter, cacheKey);

View File

@ -26,7 +26,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PublicTermsFilter;
import org.apache.lucene.search.XTermsFilter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.FixedBitSet;
@ -95,20 +95,19 @@ public class TermsFilterTests {
IndexReader reader = w.getReader();
w.close();
PublicTermsFilter tf = new PublicTermsFilter();
tf.addTerm(new Term(fieldName, "19"));
XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")});
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
assertThat(bits, nullValue());
tf.addTerm(new Term(fieldName, "20"));
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")});
bits = (FixedBitSet) tf.getDocIdSet(reader);
assertThat(bits.cardinality(), equalTo(1));
tf.addTerm(new Term(fieldName, "10"));
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")});
bits = (FixedBitSet) tf.getDocIdSet(reader);
assertThat(bits.cardinality(), equalTo(2));
tf.addTerm(new Term(fieldName, "00"));
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")});
bits = (FixedBitSet) tf.getDocIdSet(reader);
assertThat(bits.cardinality(), equalTo(2));

View File

@ -1114,10 +1114,10 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query();
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().size(), equalTo(2));
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().length, equalTo(2));
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@ -1128,10 +1128,10 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().size(), equalTo(2));
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().length, equalTo(2));
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@Test
@ -1142,10 +1142,10 @@ public class SimpleIndexQueryParserTests {
assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true));
assertThat(parsedQuery.query(), instanceOf(FilteredQuery.class));
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery.query();
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().size(), equalTo(2));
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(termsFilter.getTerms().length, equalTo(2));
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@Test