LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter to correctly respect deletions on reopened SegmentReaders. Factored out FieldCacheDocIdSet to be a top-level class

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206033 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-11-25 01:21:53 +00:00
parent d03c59d685
commit 2cf0c05869
6 changed files with 175 additions and 147 deletions

View File

@ -663,6 +663,12 @@ New Features
have at least one or no value at all in a specific field. (Simon Willnauer, have at least one or no value at all in a specific field. (Simon Willnauer,
Uwe Schindler, Robert Muir) Uwe Schindler, Robert Muir)
Bug fixes
* LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter
to correctly respect deletions on reopened SegmentReaders. Factored out
FieldCacheDocIdSet to be a top-level class. (Uwe Schindler, Simon Willnauer)
======================= Lucene 3.5.0 ======================= ======================= Lucene 3.5.0 =======================
Changes in backwards compatibility policy Changes in backwards compatibility policy

View File

@ -0,0 +1,153 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
/**
* Base class for DocIdSet to be used with FieldCache. The implementation
* of its iterator is very stupid and slow if the implementation of the
* {@link #matchDoc} method is not optimized, as iterators simply increment
* the document id until {@code matchDoc(int)} returns true. Because of this
* {@code matchDoc(int)} must be as fast as possible and in no case do any
* I/O.
* @lucene.internal
*/
public abstract class FieldCacheDocIdSet extends DocIdSet {
protected final int maxDoc;
protected final Bits acceptDocs;
public FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
this.maxDoc = maxDoc;
this.acceptDocs = acceptDocs;
}
/**
* this method checks, if a doc is a hit
*/
protected abstract boolean matchDoc(int doc);
/**
* this DocIdSet is always cacheable (does not go back
* to the reader for iteration)
*/
@Override
public final boolean isCacheable() {
return true;
}
@Override
public final Bits bits() {
return (acceptDocs == null) ? new Bits() {
public boolean get(int docid) {
return matchDoc(docid);
}
public int length() {
return maxDoc;
}
} : new Bits() {
public boolean get(int docid) {
return acceptDocs.get(docid) && matchDoc(docid);
}
public int length() {
return maxDoc;
}
};
}
@Override
public final DocIdSetIterator iterator() throws IOException {
if (acceptDocs == null) {
// Specialization optimization disregard acceptDocs
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
do {
doc++;
if (doc >= maxDoc) {
return doc = NO_MORE_DOCS;
}
} while (!matchDoc(doc));
return doc;
}
@Override
public int advance(int target) {
for(doc=target; doc<maxDoc; doc++) {
if (matchDoc(doc)) {
return doc;
}
}
return doc = NO_MORE_DOCS;
}
};
} else if (acceptDocs instanceof FixedBitSet || acceptDocs instanceof OpenBitSet) {
// special case for FixedBitSet / OpenBitSet: use the iterator and filter it
// (used e.g. when Filters are chained by FilteredQuery)
return new FilteredDocIdSetIterator(((DocIdSet) acceptDocs).iterator()) {
@Override
protected boolean match(int doc) {
return FieldCacheDocIdSet.this.matchDoc(doc);
}
};
} else {
// Stupid consultation of acceptDocs and matchDoc()
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
do {
doc++;
if (doc >= maxDoc) {
return doc = NO_MORE_DOCS;
}
} while (!acceptDocs.get(doc) || !matchDoc(doc));
return doc;
}
@Override
public int advance(int target) {
for(doc=target; doc<maxDoc; doc++) {
if (acceptDocs.get(doc) && matchDoc(doc)) {
return doc;
}
}
return doc = NO_MORE_DOCS;
}
};
}
}
}

View File

@ -124,7 +124,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
final boolean matchDoc(int doc) { protected final boolean matchDoc(int doc) {
final int docOrd = fcsi.getOrd(doc); final int docOrd = fcsi.getOrd(doc);
return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
} }
@ -175,7 +175,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser, false); final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser, false);
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -225,7 +225,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser, false); final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser, false);
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -275,7 +275,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser, false); final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser, false);
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -325,7 +325,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser, false); final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser, false);
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -379,7 +379,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser, false); final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser, false);
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -434,7 +434,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
// ignore deleted docs if range doesn't contain 0 // ignore deleted docs if range doesn't contain 0
return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
} }
}; };
@ -497,122 +497,4 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
/** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */ /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */
public FieldCache.Parser getParser() { return parser; } public FieldCache.Parser getParser() { return parser; }
static abstract class FieldCacheDocIdSet extends DocIdSet {
private final int maxDoc;
private final Bits acceptDocs;
FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
this.maxDoc = maxDoc;
this.acceptDocs = acceptDocs;
}
/**
* this method checks, if a doc is a hit, should throw AIOBE, when position
* invalid
*/
abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
/**
* this DocIdSet is always cacheable (does not go back
* to the reader for iteration)
*/
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() {
return (acceptDocs == null) ? new Bits() {
public boolean get(int docid) {
return FieldCacheDocIdSet.this.matchDoc(docid);
}
public int length() {
return FieldCacheDocIdSet.this.maxDoc;
}
} : new Bits() {
public boolean get(int docid) {
return acceptDocs.get(docid) && FieldCacheDocIdSet.this.matchDoc(docid);
}
public int length() {
return FieldCacheDocIdSet.this.maxDoc;
}
};
}
@Override
public DocIdSetIterator iterator() throws IOException {
if (acceptDocs == null) {
// Specialization optimization disregard deletions
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
try {
do {
doc++;
} while (!matchDoc(doc));
return doc;
} catch (ArrayIndexOutOfBoundsException e) {
return doc = NO_MORE_DOCS;
}
}
@Override
public int advance(int target) {
try {
doc = target;
while (!matchDoc(doc)) {
doc++;
}
return doc;
} catch (ArrayIndexOutOfBoundsException e) {
return doc = NO_MORE_DOCS;
}
}
};
} else {
// Must consult acceptDocs
// a DocIdSetIterator generating docIds by
// incrementing a variable & checking acceptDocs -
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
do {
doc++;
if (doc >= maxDoc) {
return doc = NO_MORE_DOCS;
}
} while (!acceptDocs.get(doc) || !matchDoc(doc));
return doc;
}
@Override
public int advance(int target) {
for(doc=target;doc<maxDoc;doc++) {
if (acceptDocs.get(doc) && matchDoc(doc)) {
return doc;
}
}
return doc = NO_MORE_DOCS;
}
};
}
}
}
} }

View File

@ -127,10 +127,9 @@ public class FieldCacheTermsFilter extends Filter {
bits.set(termNumber); bits.set(termNumber);
} }
} }
final int maxDoc = context.reader.maxDoc(); return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override @Override
boolean matchDoc(int doc) { protected final boolean matchDoc(int doc) {
return bits.get(fcsi.getOrd(doc)); return bits.get(fcsi.getOrd(doc));
} }
}; };

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchAllBits; import org.apache.lucene.util.Bits.MatchAllBits;
import org.apache.lucene.util.Bits.MatchNoBits; import org.apache.lucene.util.Bits.MatchNoBits;
@ -67,14 +66,9 @@ public class FieldValueFilter extends Filter {
if (docsWithField instanceof MatchAllBits) { if (docsWithField instanceof MatchAllBits) {
return null; return null;
} }
final int maxDoc = context.reader.maxDoc(); return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override @Override
final boolean matchDoc(int doc) { protected final boolean matchDoc(int doc) {
if (doc >= maxDoc) {
// TODO: this makes no sense we should check this on the caller level
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
}
return !docsWithField.get(doc); return !docsWithField.get(doc);
} }
}; };
@ -87,14 +81,9 @@ public class FieldValueFilter extends Filter {
// :-) // :-)
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs); return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
} }
final int maxDoc = context.reader.maxDoc(); return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override @Override
final boolean matchDoc(int doc) { protected final boolean matchDoc(int doc) {
if (doc >= maxDoc) {
// TODO: this makes no sense we should check this on the caller level
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
}
return docsWithField.get(doc); return docsWithField.get(doc);
} }
}; };
@ -131,7 +120,7 @@ public class FieldValueFilter extends Filter {
@Override @Override
public String toString() { public String toString() {
return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]"; return "FieldValueFilter [field=" + field + ", negate=" + negate + "]";
} }
} }

View File

@ -137,10 +137,9 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
return DocIdSet.EMPTY_DOCIDSET; return DocIdSet.EMPTY_DOCIDSET;
} }
final int maxDoc = context.reader.maxDoc(); return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override @Override
boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
return termSet.get(fcsi.getOrd(doc)); return termSet.get(fcsi.getOrd(doc));
} }
}; };