lucene 4: cleanup terms/uid filter

This commit is contained in:
Shay Banon 2012-11-02 10:25:56 +01:00
parent 79368bb221
commit 0660e20c47
4 changed files with 114 additions and 123 deletions

View File

@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.NotDeletedFilter;
// So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key
// See more: https://issues.apache.org/jira/browse/LUCENE-2468
// TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring
// LUCENE 4 UPGRADE: we probably don't need this anymore, because of acceptDocs
public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery {
private final Filter actualFilter;

View File

@ -19,13 +19,7 @@
package org.apache.lucene.search;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -39,7 +33,8 @@ import java.util.Collection;
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
* when cached, and also uses bulk read
*/
// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released
// LUCENE 4 UPGRADE: Make sure to sync this against latest 4.1
// LUCENE 4.1: once its out, we can use TermsFilter from it
public class XTermsFilter extends Filter {
private final Term[] filterTerms;
@ -71,9 +66,9 @@ public class XTermsFilter extends Filter {
boolean fieldChanged = true;
if (index > 0) {
// deduplicate
if (filterTerms[index-1].field().equals(currentTerm.field())) {
if (filterTerms[index - 1].field().equals(currentTerm.field())) {
fieldChanged = false;
if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){
if (filterTerms[index - 1].bytes().bytesEquals(currentTerm.bytes())) {
continue;
}
}
@ -108,14 +103,14 @@ public class XTermsFilter extends Filter {
if (resetTermsEnum[i]) {
terms = fields.terms(term.field());
if (terms == null) {
i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed
i = skipToNextField(i + 1, length); // skip to the next field since this field is not indexed
continue;
}
}
if ((termsEnum = terms.iterator(termsEnum)) != null) {
br.copyBytes(term.bytes());
assert termsEnum != null;
if (termsEnum.seekExact(br,true)) {
if (termsEnum.seekExact(br, true)) {
docs = termsEnum.docs(acceptDocs, docs, 0);
if (result == null) {
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
@ -136,7 +131,7 @@ public class XTermsFilter extends Filter {
private final int skipToNextField(int index, int length) {
for (int i = index; i < length; i++) {
if (resetTermsEnum[i]) {
return i-1;
return i - 1;
}
}
return length;

View File

@ -166,7 +166,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
if (indexed() || context == null) {
return super.fieldQuery(value, context);
}
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value));
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
return new ConstantScoreQuery(filter);
}
@ -176,7 +176,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
if (indexed() || context == null) {
return super.fieldFilter(value, context);
}
return new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
return new UidFilter(context.queryTypes(), ImmutableList.of(value));
}
@Override

View File

@ -19,11 +19,7 @@
package org.elasticsearch.index.search;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
@ -36,9 +32,11 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
// LUCENE 4 UPGRADE: we can potentially use TermsFilter here, specifically, now when we don't do bloom filter, batching, and with optimization on single field terms
public class UidFilter extends Filter {
final Term[] uids;
public UidFilter(Collection<String> types, List<String> ids) {
this.uids = new Term[types.size() * ids.size()];
int i = 0;
@ -60,7 +58,6 @@ public class UidFilter extends Filter {
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
@Override
// LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
FixedBitSet set = null;
final AtomicReader reader = ctx.reader();
@ -71,8 +68,6 @@ public class UidFilter extends Filter {
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
// no need for batching, its on the UID, there will be only
// one doc
if (set == null) {
set = new FixedBitSet(reader.maxDoc());
}