terms filter uses less memory when cached
move from a TreeSet to an array, sorting on creation
This commit is contained in:
parent
72646fdfea
commit
ccea825966
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermDocs;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
||||||
|
* when cached, and also uses bulk read
|
||||||
|
*/
|
||||||
|
// LUCENE MONITOR: Against TermsFilter
|
||||||
|
public class XTermsFilter extends Filter {
|
||||||
|
|
||||||
|
private final Term[] terms;
|
||||||
|
|
||||||
|
public XTermsFilter(Term term) {
|
||||||
|
this.terms = new Term[]{term};
|
||||||
|
}
|
||||||
|
|
||||||
|
public XTermsFilter(Term[] terms) {
|
||||||
|
Arrays.sort(terms);
|
||||||
|
this.terms = terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Term[] getTerms() {
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj)
|
||||||
|
return true;
|
||||||
|
if ((obj == null) || (obj.getClass() != this.getClass()))
|
||||||
|
return false;
|
||||||
|
XTermsFilter test = (XTermsFilter) obj;
|
||||||
|
return Arrays.equals(terms, test.terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.hashCode(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
FixedBitSet result = null;
|
||||||
|
TermDocs td = reader.termDocs();
|
||||||
|
try {
|
||||||
|
// batch read, in Lucene 4.0 its no longer needed
|
||||||
|
int[] docs = new int[32];
|
||||||
|
int[] freqs = new int[32];
|
||||||
|
for (Term term : terms) {
|
||||||
|
td.seek(term);
|
||||||
|
int number = td.read(docs, freqs);
|
||||||
|
if (number > 0) {
|
||||||
|
if (result == null) {
|
||||||
|
result = new FixedBitSet(reader.maxDoc());
|
||||||
|
}
|
||||||
|
while (number > 0) {
|
||||||
|
for (int i = 0; i < number; i++) {
|
||||||
|
result.set(docs[i]);
|
||||||
|
}
|
||||||
|
number = td.read(docs, freqs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
td.close();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
for (Term term : terms) {
|
||||||
|
if (builder.length() > 0) {
|
||||||
|
builder.append(' ');
|
||||||
|
}
|
||||||
|
builder.append(term);
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -105,8 +105,8 @@ public class CustomFieldQuery extends FieldQuery {
|
||||||
}
|
}
|
||||||
if (sourceFilter instanceof TermFilter) {
|
if (sourceFilter instanceof TermFilter) {
|
||||||
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
|
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
|
||||||
} else if (sourceFilter instanceof PublicTermsFilter) {
|
} else if (sourceFilter instanceof XTermsFilter) {
|
||||||
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
|
XTermsFilter termsFilter = (XTermsFilter) sourceFilter;
|
||||||
for (Term term : termsFilter.getTerms()) {
|
for (Term term : termsFilter.getTerms()) {
|
||||||
flatten(new TermQuery(term), reader, flatQueries);
|
flatten(new TermQuery(term), reader, flatQueries);
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.FilterClause;
|
import org.apache.lucene.search.FilterClause;
|
||||||
import org.apache.lucene.search.PublicTermsFilter;
|
import org.apache.lucene.search.XTermsFilter;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.io.Streams;
|
import org.elasticsearch.common.io.Streams;
|
||||||
|
@ -336,11 +336,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (useTermsFilter) {
|
if (useTermsFilter) {
|
||||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
Term[] typesTerms = new Term[types.length];
|
||||||
for (String type : types) {
|
for (int i = 0; i < typesTerms.length; i++) {
|
||||||
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
|
typesTerms[i] = TypeFieldMapper.TERM_FACTORY.createTerm(types[i]);
|
||||||
}
|
}
|
||||||
return termsFilter;
|
return new XTermsFilter(typesTerms);
|
||||||
} else {
|
} else {
|
||||||
XBooleanFilter bool = new XBooleanFilter();
|
XBooleanFilter bool = new XBooleanFilter();
|
||||||
for (String type : types) {
|
for (String type : types) {
|
||||||
|
@ -355,27 +355,6 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter to filter based on several types.
|
|
||||||
*/
|
|
||||||
public Filter typesFilterFailOnMissing(String... types) throws TypeMissingException {
|
|
||||||
if (types.length == 1) {
|
|
||||||
DocumentMapper docMapper = documentMapper(types[0]);
|
|
||||||
if (docMapper == null) {
|
|
||||||
throw new TypeMissingException(index, types[0]);
|
|
||||||
}
|
|
||||||
return docMapper.typeFilter();
|
|
||||||
}
|
|
||||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
|
||||||
for (String type : types) {
|
|
||||||
if (!hasMapping(type)) {
|
|
||||||
throw new TypeMissingException(index, type);
|
|
||||||
}
|
|
||||||
termsFilter.addTerm(TypeFieldMapper.TERM_FACTORY.createTerm(type));
|
|
||||||
}
|
|
||||||
return termsFilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns {@link FieldMappers} for all the {@link FieldMapper}s that are registered
|
* Returns {@link FieldMappers} for all the {@link FieldMapper}s that are registered
|
||||||
* under the given name across all the different {@link DocumentMapper} types.
|
* under the given name across all the different {@link DocumentMapper} types.
|
||||||
|
|
|
@ -24,8 +24,8 @@ import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.PublicTermsFilter;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.XTermsFilter;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
@ -199,11 +199,12 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
||||||
return super.fieldFilter(value, context);
|
return super.fieldFilter(value, context);
|
||||||
}
|
}
|
||||||
// we use all types, cause we don't know if its exact or not...
|
// we use all types, cause we don't know if its exact or not...
|
||||||
PublicTermsFilter filter = new PublicTermsFilter();
|
Term[] typesTerms = new Term[context.mapperService().types().size()];
|
||||||
|
int i = 0;
|
||||||
for (String type : context.mapperService().types()) {
|
for (String type : context.mapperService().types()) {
|
||||||
filter.addTerm(names.createIndexNameTerm(Uid.createUid(type, value)));
|
typesTerms[i++] = names.createIndexNameTerm(Uid.createUid(type, value));
|
||||||
}
|
}
|
||||||
return filter;
|
return new XTermsFilter(typesTerms);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -22,7 +22,7 @@ package org.elasticsearch.index.query;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.PublicTermsFilter;
|
import org.apache.lucene.search.XTermsFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.lucene.search.AndFilter;
|
import org.elasticsearch.common.lucene.search.AndFilter;
|
||||||
import org.elasticsearch.common.lucene.search.TermFilter;
|
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||||
|
@ -115,17 +115,17 @@ public class TermsFilterParser implements FilterParser {
|
||||||
try {
|
try {
|
||||||
Filter filter;
|
Filter filter;
|
||||||
if ("plain".equals(execution)) {
|
if ("plain".equals(execution)) {
|
||||||
PublicTermsFilter termsFilter = new PublicTermsFilter();
|
Term[] filterTerms = new Term[terms.size()];
|
||||||
if (fieldMapper != null) {
|
if (fieldMapper != null) {
|
||||||
for (String term : terms) {
|
for (int i = 0; i < filterTerms.length; i++) {
|
||||||
termsFilter.addTerm(fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(term)));
|
filterTerms[i] = fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(terms.get(i)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (String term : terms) {
|
for (int i = 0; i < filterTerms.length; i++) {
|
||||||
termsFilter.addTerm(new Term(fieldName, term));
|
filterTerms[i] = new Term(fieldName, terms.get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
filter = termsFilter;
|
filter = new XTermsFilter(filterTerms);
|
||||||
// cache the whole filter by default, or if explicitly told to
|
// cache the whole filter by default, or if explicitly told to
|
||||||
if (cache == null || cache) {
|
if (cache == null || cache) {
|
||||||
filter = parseContext.cacheFilter(filter, cacheKey);
|
filter = parseContext.cacheFilter(filter, cacheKey);
|
||||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.PublicTermsFilter;
|
import org.apache.lucene.search.XTermsFilter;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
@ -95,20 +95,19 @@ public class TermsFilterTests {
|
||||||
IndexReader reader = w.getReader();
|
IndexReader reader = w.getReader();
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
PublicTermsFilter tf = new PublicTermsFilter();
|
XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")});
|
||||||
tf.addTerm(new Term(fieldName, "19"));
|
|
||||||
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
|
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
assertThat(bits, nullValue());
|
assertThat(bits, nullValue());
|
||||||
|
|
||||||
tf.addTerm(new Term(fieldName, "20"));
|
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")});
|
||||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
assertThat(bits.cardinality(), equalTo(1));
|
assertThat(bits.cardinality(), equalTo(1));
|
||||||
|
|
||||||
tf.addTerm(new Term(fieldName, "10"));
|
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")});
|
||||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
assertThat(bits.cardinality(), equalTo(2));
|
assertThat(bits.cardinality(), equalTo(2));
|
||||||
|
|
||||||
tf.addTerm(new Term(fieldName, "00"));
|
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")});
|
||||||
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
assertThat(bits.cardinality(), equalTo(2));
|
assertThat(bits.cardinality(), equalTo(2));
|
||||||
|
|
||||||
|
|
|
@ -1114,10 +1114,10 @@ public class SimpleIndexQueryParserTests {
|
||||||
Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query();
|
Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query();
|
||||||
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
||||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
||||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1128,10 +1128,10 @@ public class SimpleIndexQueryParserTests {
|
||||||
Query parsedQuery = queryParser.parse(query).query();
|
Query parsedQuery = queryParser.parse(query).query();
|
||||||
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
assertThat(parsedQuery, instanceOf(FilteredQuery.class));
|
||||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery;
|
||||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -1142,10 +1142,10 @@ public class SimpleIndexQueryParserTests {
|
||||||
assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true));
|
assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true));
|
||||||
assertThat(parsedQuery.query(), instanceOf(FilteredQuery.class));
|
assertThat(parsedQuery.query(), instanceOf(FilteredQuery.class));
|
||||||
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery.query();
|
FilteredQuery filteredQuery = (FilteredQuery) parsedQuery.query();
|
||||||
assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class));
|
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
|
||||||
PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter();
|
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
|
||||||
assertThat(termsFilter.getTerms().size(), equalTo(2));
|
assertThat(termsFilter.getTerms().length, equalTo(2));
|
||||||
assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon"));
|
assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue