diff --git a/src/main/java/org/apache/lucene/search/PublicTermsFilter.java b/src/main/java/org/apache/lucene/search/PublicTermsFilter.java index f7cc68bf914..ce115f4dfb4 100644 --- a/src/main/java/org/apache/lucene/search/PublicTermsFilter.java +++ b/src/main/java/org/apache/lucene/search/PublicTermsFilter.java @@ -76,15 +76,21 @@ public class PublicTermsFilter extends Filter { FixedBitSet result = null; TermDocs td = reader.termDocs(); try { + // batch read, in Lucene 4.0 its no longer needed + int[] docs = new int[32]; + int[] freqs = new int[32]; for (Term term : terms) { td.seek(term); - if (td.next()) { + int number = td.read(docs, freqs); + if (number > 0) { if (result == null) { result = new FixedBitSet(reader.maxDoc()); } - result.set(td.doc()); - while (td.next()) { - result.set(td.doc()); + while (number > 0) { + for (int i = 0; i < number; i++) { + result.set(docs[i]); + } + number = td.read(docs, freqs); } } } @@ -97,8 +103,8 @@ public class PublicTermsFilter extends Filter { @Override public String toString() { StringBuilder builder = new StringBuilder(); - for(Term term: terms) { - if(builder.length() > 0) { + for (Term term : terms) { + if (builder.length() > 0) { builder.append(' '); } builder.append(term); diff --git a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java index d28cdd2ca3b..31daeff7de1 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java @@ -30,8 +30,6 @@ import java.io.IOException; /** * A simple filter for a specific term. - * - * */ public class TermFilter extends Filter { @@ -51,11 +49,17 @@ public class TermFilter extends Filter { TermDocs td = reader.termDocs(); try { td.seek(term); - if (td.next()) { + // batch read, in Lucene 4.0 its no longer needed + int[] docs = new int[32]; + int[] freqs = new int[32]; + int number = td.read(docs, freqs); + if (number > 0) { result = new FixedBitSet(reader.maxDoc()); - result.set(td.doc()); - while (td.next()) { - result.set(td.doc()); + while (number > 0) { + for (int i = 0; i < number; i++) { + result.set(docs[i]); + } + number = td.read(docs, freqs); } } } finally { diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java new file mode 100644 index 00000000000..d3d3b4c0839 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java @@ -0,0 +1,118 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.unit.common.lucene.search; + +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.PublicTermsFilter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.TermFilter; +import org.testng.annotations.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +/** + */ +@Test +public class TermsFilterTests { + + @Test + public void testTermFilter() throws Exception { + String fieldName = "field1"; + Directory rd = new RAMDirectory(); + IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer())); + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + int term = i * 10; //terms are units of 10; + doc.add(new Field(fieldName, "" + term, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("all", "xxx", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + if ((i % 40) == 0) { + w.commit(); + } + } + IndexReader reader = w.getReader(); + w.close(); + + TermFilter tf = new TermFilter(new Term(fieldName, "19")); + FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits, nullValue()); + + tf = new TermFilter(new Term(fieldName, "20")); + bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits.cardinality(), equalTo(1)); + + tf = new TermFilter(new Term("all", "xxx")); + bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits.cardinality(), equalTo(100)); + + reader.close(); + rd.close(); + } + + @Test + public void testTermsFilter() throws Exception { + String fieldName = "field1"; + Directory rd = new RAMDirectory(); + IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer())); + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + int term = i * 10; //terms are units of 10; + doc.add(new Field(fieldName, "" + term, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("all", "xxx", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + if ((i % 40) == 0) { + w.commit(); + } + } + IndexReader reader = w.getReader(); + w.close(); + + PublicTermsFilter tf = new PublicTermsFilter(); + tf.addTerm(new Term(fieldName, "19")); + FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits, nullValue()); + + tf.addTerm(new Term(fieldName, "20")); + bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits.cardinality(), equalTo(1)); + + tf.addTerm(new Term(fieldName, "10")); + bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits.cardinality(), equalTo(2)); + + tf.addTerm(new Term(fieldName, "00")); + bits = (FixedBitSet) tf.getDocIdSet(reader); + assertThat(bits.cardinality(), equalTo(2)); + + reader.close(); + rd.close(); + } +}