From ccea8259669a6f165ee5bb42e4c6dc2561e8c252 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 7 Jun 2012 23:34:21 +0200 Subject: [PATCH] terms filter uses less memory when cached move from a TreeSet to an array, sorting on creation --- .../apache/lucene/search/XTermsFilter.java | 108 ++++++++++++++++++ .../vectorhighlight/CustomFieldQuery.java | 4 +- .../index/mapper/MapperService.java | 31 +---- .../mapper/internal/ParentFieldMapper.java | 9 +- .../index/query/TermsFilterParser.java | 14 +-- .../lucene/search/TermsFilterTests.java | 11 +- .../query/SimpleIndexQueryParserTests.java | 24 ++-- 7 files changed, 144 insertions(+), 57 deletions(-) create mode 100644 src/main/java/org/apache/lucene/search/XTermsFilter.java diff --git a/src/main/java/org/apache/lucene/search/XTermsFilter.java b/src/main/java/org/apache/lucene/search/XTermsFilter.java new file mode 100644 index 00000000000..c467825c419 --- /dev/null +++ b/src/main/java/org/apache/lucene/search/XTermsFilter.java @@ -0,0 +1,108 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.util.FixedBitSet; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Similar to {@link TermsFilter} but stores the terms in an array for better memory usage + * when cached, and also uses bulk read + */ +// LUCENE MONITOR: Against TermsFilter +public class XTermsFilter extends Filter { + + private final Term[] terms; + + public XTermsFilter(Term term) { + this.terms = new Term[]{term}; + } + + public XTermsFilter(Term[] terms) { + Arrays.sort(terms); + this.terms = terms; + } + + public Term[] getTerms() { + return terms; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if ((obj == null) || (obj.getClass() != this.getClass())) + return false; + XTermsFilter test = (XTermsFilter) obj; + return Arrays.equals(terms, test.terms); + } + + @Override + public int hashCode() { + return Arrays.hashCode(terms); + } + + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + FixedBitSet result = null; + TermDocs td = reader.termDocs(); + try { + // batch read, in Lucene 4.0 its no longer needed + int[] docs = new int[32]; + int[] freqs = new int[32]; + for (Term term : terms) { + td.seek(term); + int number = td.read(docs, freqs); + if (number > 0) { + if (result == null) { + result = new FixedBitSet(reader.maxDoc()); + } + while (number > 0) { + for (int i = 0; i < number; i++) { + result.set(docs[i]); + } + number = td.read(docs, freqs); + } + } + } + } finally { + td.close(); + } + return result; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + for (Term term : terms) { + if (builder.length() > 0) { + builder.append(' '); + } + builder.append(term); + } + return builder.toString(); + } + +} diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index 79c44f73fbf..d158e5004af 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -105,8 +105,8 @@ public class CustomFieldQuery extends FieldQuery { } if (sourceFilter instanceof TermFilter) { flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries); - } else if (sourceFilter instanceof PublicTermsFilter) { - PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter; + } else if (sourceFilter instanceof XTermsFilter) { + XTermsFilter termsFilter = (XTermsFilter) sourceFilter; for (Term term : termsFilter.getTerms()) { flatten(new TermQuery(term), reader, flatQueries); } diff --git a/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 78f843255e4..f9e0ac1cbb5 100644 --- a/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -31,7 +31,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilterClause; -import org.apache.lucene.search.PublicTermsFilter; +import org.apache.lucene.search.XTermsFilter; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.Streams; @@ -336,11 +336,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable implements Inter return super.fieldFilter(value, context); } // we use all types, cause we don't know if its exact or not... - PublicTermsFilter filter = new PublicTermsFilter(); + Term[] typesTerms = new Term[context.mapperService().types().size()]; + int i = 0; for (String type : context.mapperService().types()) { - filter.addTerm(names.createIndexNameTerm(Uid.createUid(type, value))); + typesTerms[i++] = names.createIndexNameTerm(Uid.createUid(type, value)); } - return filter; + return new XTermsFilter(typesTerms); } /** diff --git a/src/main/java/org/elasticsearch/index/query/TermsFilterParser.java b/src/main/java/org/elasticsearch/index/query/TermsFilterParser.java index e9559819518..269a795190b 100644 --- a/src/main/java/org/elasticsearch/index/query/TermsFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/TermsFilterParser.java @@ -22,7 +22,7 @@ package org.elasticsearch.index.query; import com.google.common.collect.Lists; import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.PublicTermsFilter; +import org.apache.lucene.search.XTermsFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.AndFilter; import org.elasticsearch.common.lucene.search.TermFilter; @@ -115,17 +115,17 @@ public class TermsFilterParser implements FilterParser { try { Filter filter; if ("plain".equals(execution)) { - PublicTermsFilter termsFilter = new PublicTermsFilter(); + Term[] filterTerms = new Term[terms.size()]; if (fieldMapper != null) { - for (String term : terms) { - termsFilter.addTerm(fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(term))); + for (int i = 0; i < filterTerms.length; i++) { + filterTerms[i] = fieldMapper.names().createIndexNameTerm(fieldMapper.indexedValue(terms.get(i))); } } else { - for (String term : terms) { - termsFilter.addTerm(new Term(fieldName, term)); + for (int i = 0; i < filterTerms.length; i++) { + filterTerms[i] = new Term(fieldName, terms.get(i)); } } - filter = termsFilter; + filter = new XTermsFilter(filterTerms); // cache the whole filter by default, or if explicitly told to if (cache == null || cache) { filter = parseContext.cacheFilter(filter, cacheKey); diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java index d3d3b4c0839..4e0ef138c36 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java @@ -26,7 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.search.PublicTermsFilter; +import org.apache.lucene.search.XTermsFilter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.FixedBitSet; @@ -95,20 +95,19 @@ public class TermsFilterTests { IndexReader reader = w.getReader(); w.close(); - PublicTermsFilter tf = new PublicTermsFilter(); - tf.addTerm(new Term(fieldName, "19")); + XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")}); FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader); assertThat(bits, nullValue()); - tf.addTerm(new Term(fieldName, "20")); + tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")}); bits = (FixedBitSet) tf.getDocIdSet(reader); assertThat(bits.cardinality(), equalTo(1)); - tf.addTerm(new Term(fieldName, "10")); + tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")}); bits = (FixedBitSet) tf.getDocIdSet(reader); assertThat(bits.cardinality(), equalTo(2)); - tf.addTerm(new Term(fieldName, "00")); + tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")}); bits = (FixedBitSet) tf.getDocIdSet(reader); assertThat(bits.cardinality(), equalTo(2)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index a12c31e9aee..8c4ff964eaf 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -1114,10 +1114,10 @@ public class SimpleIndexQueryParserTests { Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query(); assertThat(parsedQuery, instanceOf(FilteredQuery.class)); FilteredQuery filteredQuery = (FilteredQuery) parsedQuery; - assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class)); - PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter(); - assertThat(termsFilter.getTerms().size(), equalTo(2)); - assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon")); + assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class)); + XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter(); + assertThat(termsFilter.getTerms().length, equalTo(2)); + assertThat(termsFilter.getTerms()[0].text(), equalTo("banon")); } @@ -1128,10 +1128,10 @@ public class SimpleIndexQueryParserTests { Query parsedQuery = queryParser.parse(query).query(); assertThat(parsedQuery, instanceOf(FilteredQuery.class)); FilteredQuery filteredQuery = (FilteredQuery) parsedQuery; - assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class)); - PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter(); - assertThat(termsFilter.getTerms().size(), equalTo(2)); - assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon")); + assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class)); + XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter(); + assertThat(termsFilter.getTerms().length, equalTo(2)); + assertThat(termsFilter.getTerms()[0].text(), equalTo("banon")); } @Test @@ -1142,10 +1142,10 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true)); assertThat(parsedQuery.query(), instanceOf(FilteredQuery.class)); FilteredQuery filteredQuery = (FilteredQuery) parsedQuery.query(); - assertThat(filteredQuery.getFilter(), instanceOf(PublicTermsFilter.class)); - PublicTermsFilter termsFilter = (PublicTermsFilter) filteredQuery.getFilter(); - assertThat(termsFilter.getTerms().size(), equalTo(2)); - assertThat(termsFilter.getTerms().iterator().next().text(), equalTo("banon")); + assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class)); + XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter(); + assertThat(termsFilter.getTerms().length, equalTo(2)); + assertThat(termsFilter.getTerms()[0].text(), equalTo("banon")); } @Test