terms filter uses less memory when cached
move from a TreeSet to an array, sorting on creation
This commit is contained in:
parent
72646fdfea
commit
ccea825966
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
||||
* when cached, and also uses bulk read
|
||||
*/
|
||||
// LUCENE MONITOR: Against TermsFilter
|
||||
public class XTermsFilter extends Filter {
|
||||
|
||||
private final Term[] terms;
|
||||
|
||||
public XTermsFilter(Term term) {
|
||||
this.terms = new Term[]{term};
|
||||
}
|
||||
|
||||
public XTermsFilter(Term[] terms) {
|
||||
Arrays.sort(terms);
|
||||
this.terms = terms;
|
||||
}
|
||||
|
||||
public Term[] getTerms() {
|
||||
return terms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if ((obj == null) || (obj.getClass() != this.getClass()))
|
||||
return false;
|
||||
XTermsFilter test = (XTermsFilter) obj;
|
||||
return Arrays.equals(terms, test.terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Arrays.hashCode(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
FixedBitSet result = null;
|
||||
TermDocs td = reader.termDocs();
|
||||
try {
|
||||
// batch read, in Lucene 4.0 its no longer needed
|
||||
int[] docs = new int[32];
|
||||
int[] freqs = new int[32];
|
||||
for (Term term : terms) {
|
||||
td.seek(term);
|
||||
int number = td.read(docs, freqs);
|
||||
if (number > 0) {
|
||||
if (result == null) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
}
|
||||
while (number > 0) {
|
||||
for (int i = 0; i < number; i++) {
|
||||
result.set(docs[i]);
|
||||
}
|
||||
number = td.read(docs, freqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
td.close();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (Term term : terms) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(term);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -105,8 +105,8 @@ public class CustomFieldQuery extends FieldQuery {
|
|||
}
|
||||
if (sourceFilter instanceof TermFilter) {
|
||||
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
|
||||
} else if (sourceFilter instanceof PublicTermsFilter) {
|
||||
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
|
||||
} else if (sourceFilter instanceof XTermsFilter) {
|
||||
XTermsFilter termsFilter = (XTermsFilter) sourceFilter;
|
||||
for (Term term : termsFilter.getTerms()) {
|
||||
flatten(new TermQuery(term), reader, flatQueries);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilterClause;
|
||||
import org.apache.lucene.search.PublicTermsFilter;
|
||||
import org.apache.lucene.search.XTermsFilter;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.io.Streams;
|
||||
|
@ -336,11 +336,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
|
|||
}
|
||||
}
|
||||
if (useTermsFilter) {
|
||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
||||
for (String type : types) {
|
||||
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
|
||||
Term[] typesTerms = new Term[types.length];
|
||||
for (int i = 0; i < typesTerms.length; i++) {
|
||||
typesTerms[i] = TypeFieldMapper.TERM_FACTORY.createTerm(types[i]);
|
||||
}
|
||||
return termsFilter;
|
||||
return new XTermsFilter(typesTerms);
|
||||
} else {
|
||||
XBooleanFilter bool = new XBooleanFilter();
|
||||
for (String type : types) {
|
||||
|
@ -355,27 +355,6 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A filter to filter based on several types.
|
||||
*/
|
||||
public Filter typesFilterFailOnMissing(String... types) throws TypeMissingException {
|
||||
if (types.length == 1) {
|
||||
DocumentMapper docMapper = documentMapper(types[0]);
|
||||
if (docMapper == null) {
|
||||
throw new TypeMissingException(index, types[0]);
|
||||
}
|
||||
return docMapper.typeFilter();
|
||||
}
|
||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
||||
for (String type : types) {
|
||||
if (!hasMapping(type)) {
|
||||
throw new TypeMissingException(index, type);
|
||||
}
|
||||
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
|
||||
}
|
||||
return termsFilter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@link FieldMappers} for all the {@link FieldMapper}s that are registered
|
||||
* under the given name across all the different {@link DocumentMapper} types.
|
||||
|
|
|
@ -24,8 +24,8 @@ import org.apache.lucene.document.Fieldable;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.PublicTermsFilter;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.XTermsFilter;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -199,11 +199,12 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
|||
return super.fieldFilter(value, context);
|
||||
}
|
||||
// we use all types, cause we don't know if its exact or not...
|
||||
PublicTermsFilter filter = new PublicTermsFilter();
|
||||
Term[] typesTerms = new Term[context.mapperService().types().size()];
|
||||
int i = 0;
|
||||
for (String type : context.mapperService().types()) {
|
||||
filter.addTerm(names.createIndexNameTerm(Uid.createUid(type, value)));
|
||||
typesTerms[i++] = names.createIndexNameTerm(Uid.createUid(type, value));
|
||||
}
|
||||
return filter;
|
||||
return new XTermsFilter(typesTerms);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,7 +22,7 @@ package org.elasticsearch.index.query;
|
|||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.PublicTermsFilter;
|
||||
import org.apache.lucene.search.XTermsFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.search.AndFilter;
|
||||
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||
|
@ -115,17 +115,17 @@ public class TermsFilterParser implements FilterParser {
|
|||
try {
|
||||
Filter filter;
|
||||
if ("plain".equals(execution)) {
|
||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
||||
Term[] filterTerms = new Term[terms.size()];
|
||||
if (fieldMapper != null) {
|
||||
for (String term : terms) {
|
||||
termsFilter.addTerm(fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(term)));
|
||||
for (int i = 0; i < filterTerms.length; i++) {
|
||||
filterTerms[i] = fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(terms.get(i)));
|
||||
}
|
||||
} else {
|
||||
for (String term : terms) {
|
||||
termsFilter.addTerm(new Term(fieldName, term));
|
||||
for (int i = 0; i < filterTerms.length; i++) {
|
||||
filterTerms[i] = new Term(fieldName, terms.get(i));
|
||||
}
|
||||
}
|
||||
filter = termsFilter;
|
||||
filter = new XTermsFilter(filterTerms);
|
||||
// cache the whole filter by default, or if explicitly told to
|
||||
if (cache == null || cache) {
|
||||
filter = parseContext.cacheFilter(filter, cacheKey);
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.PublicTermsFilter;
|
||||
import org.apache.lucene.search.XTermsFilter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -95,20 +95,19 @@ public class TermsFilterTests {
|
|||
IndexReader reader = w.getReader();
|
||||
w.close();
|
||||
|
||||
PublicTermsFilter tf = new PublicTermsFilter();
|
||||
tf.addTerm(new Term(fieldName, "19"));
|
||||
XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")});
|
||||
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||
assertThat(bits, nullValue());
|
||||
|
||||
tf.addTerm(new Term(fieldName, "20"));
|
||||
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")});
|
||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||
assertThat(bits.cardinality(), equalTo(1));
|
||||
|
||||
tf.addTerm(new Term(fieldName, "10"));
|
||||
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")});
|
||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||
assertThat(bits.cardinality(), equalTo(2));
|
||||
|
||||
tf.addTerm(new Term(fieldName, "00"));
|
||||
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")});
|
||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||
assertThat(bits.cardinality(), equalTo(2));
|
||||
|
||||
|
|
|
@ -1114,10 +1114,10 @@ public class SimpleIndexQueryParserTests {
|
|||
Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query();
|
||||
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||
}
|
||||
|
||||
|
||||
|
@ -1128,10 +1128,10 @@ public class SimpleIndexQueryParserTests {
|
|||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1142,10 +1142,10 @@ public class SimpleIndexQueryParserTests {
|
|||
assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true));
|
||||
assertThat(parsedQuery.query(), instanceOf(FilteredQuery.class));
|
||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery.query();
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
||||
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue