Query DSL: term/terms filter performance improvement (bulk reading), closes #1972.
This commit is contained in:
parent
86cd95aee2
commit
f87632fabd
|
@ -76,15 +76,21 @@ public class PublicTermsFilter extends Filter {
|
||||||
FixedBitSet result = null;
|
FixedBitSet result = null;
|
||||||
TermDocs td = reader.termDocs();
|
TermDocs td = reader.termDocs();
|
||||||
try {
|
try {
|
||||||
|
// batch read, in Lucene 4.0 its no longer needed
|
||||||
|
int[] docs = new int[32];
|
||||||
|
int[] freqs = new int[32];
|
||||||
for (Term term : terms) {
|
for (Term term : terms) {
|
||||||
td.seek(term);
|
td.seek(term);
|
||||||
if (td.next()) {
|
int number = td.read(docs, freqs);
|
||||||
|
if (number > 0) {
|
||||||
if (result == null) {
|
if (result == null) {
|
||||||
result = new FixedBitSet(reader.maxDoc());
|
result = new FixedBitSet(reader.maxDoc());
|
||||||
}
|
}
|
||||||
result.set(td.doc());
|
while (number > 0) {
|
||||||
while (td.next()) {
|
for (int i = 0; i < number; i++) {
|
||||||
result.set(td.doc());
|
result.set(docs[i]);
|
||||||
|
}
|
||||||
|
number = td.read(docs, freqs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,8 +103,8 @@ public class PublicTermsFilter extends Filter {
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
for(Term term: terms) {
|
for (Term term : terms) {
|
||||||
if(builder.length() > 0) {
|
if (builder.length() > 0) {
|
||||||
builder.append(' ');
|
builder.append(' ');
|
||||||
}
|
}
|
||||||
builder.append(term);
|
builder.append(term);
|
||||||
|
|
|
@ -30,8 +30,6 @@ import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple filter for a specific term.
|
* A simple filter for a specific term.
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class TermFilter extends Filter {
|
public class TermFilter extends Filter {
|
||||||
|
|
||||||
|
@ -51,11 +49,17 @@ public class TermFilter extends Filter {
|
||||||
TermDocs td = reader.termDocs();
|
TermDocs td = reader.termDocs();
|
||||||
try {
|
try {
|
||||||
td.seek(term);
|
td.seek(term);
|
||||||
if (td.next()) {
|
// batch read, in Lucene 4.0 its no longer needed
|
||||||
|
int[] docs = new int[32];
|
||||||
|
int[] freqs = new int[32];
|
||||||
|
int number = td.read(docs, freqs);
|
||||||
|
if (number > 0) {
|
||||||
result = new FixedBitSet(reader.maxDoc());
|
result = new FixedBitSet(reader.maxDoc());
|
||||||
result.set(td.doc());
|
while (number > 0) {
|
||||||
while (td.next()) {
|
for (int i = 0; i < number; i++) {
|
||||||
result.set(td.doc());
|
result.set(docs[i]);
|
||||||
|
}
|
||||||
|
number = td.read(docs, freqs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.unit.common.lucene.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.PublicTermsFilter;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public class TermsFilterTests {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTermFilter() throws Exception {
|
||||||
|
String fieldName = "field1";
|
||||||
|
Directory rd = new RAMDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()));
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
int term = i * 10; //terms are units of 10;
|
||||||
|
doc.add(new Field(fieldName, "" + term, Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||||
|
doc.add(new Field("all", "xxx", Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
if ((i % 40) == 0) {
|
||||||
|
w.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
TermFilter tf = new TermFilter(new Term(fieldName, "19"));
|
||||||
|
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits, nullValue());
|
||||||
|
|
||||||
|
tf = new TermFilter(new Term(fieldName, "20"));
|
||||||
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits.cardinality(), equalTo(1));
|
||||||
|
|
||||||
|
tf = new TermFilter(new Term("all", "xxx"));
|
||||||
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits.cardinality(), equalTo(100));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
rd.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTermsFilter() throws Exception {
|
||||||
|
String fieldName = "field1";
|
||||||
|
Directory rd = new RAMDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()));
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
int term = i * 10; //terms are units of 10;
|
||||||
|
doc.add(new Field(fieldName, "" + term, Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||||
|
doc.add(new Field("all", "xxx", Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
if ((i % 40) == 0) {
|
||||||
|
w.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
PublicTermsFilter tf = new PublicTermsFilter();
|
||||||
|
tf.addTerm(new Term(fieldName, "19"));
|
||||||
|
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits, nullValue());
|
||||||
|
|
||||||
|
tf.addTerm(new Term(fieldName, "20"));
|
||||||
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits.cardinality(), equalTo(1));
|
||||||
|
|
||||||
|
tf.addTerm(new Term(fieldName, "10"));
|
||||||
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits.cardinality(), equalTo(2));
|
||||||
|
|
||||||
|
tf.addTerm(new Term(fieldName, "00"));
|
||||||
|
bits = (FixedBitSet) tf.getDocIdSet(reader);
|
||||||
|
assertThat(bits.cardinality(), equalTo(2));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
rd.close();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue