detect when the visitor is being re-used e.g. by SlowCompositeReaderWrapper

This commit is contained in:
Mike McCandless 2016-02-26 14:54:42 -05:00
parent 54e7bb5f58
commit 9db0ddc22f
3 changed files with 44 additions and 6 deletions

View File

@ -40,7 +40,9 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
/** Finds all documents whose point value, previously indexed with e.g. {@link org.apache.lucene.document.LongPoint}, is contained in the
* specified set */
* specified set.
*
* @lucene.experimental */
public class PointInSetQuery extends Query {
// A little bit overkill for us, since all of our "terms" are always in the same field:
@ -124,7 +126,7 @@ public class PointInSetQuery extends Query {
if (numDims == 1) {
// We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
values.intersect(field, new MergePointVisitor(sortedPackedPoints.iterator(), hitCount, result));
values.intersect(field, new MergePointVisitor(sortedPackedPoints, hitCount, result));
} else {
// NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
@ -150,16 +152,24 @@ public class PointInSetQuery extends Query {
private final DocIdSetBuilder result;
private final int[] hitCount;
private final TermIterator iterator;
private TermIterator iterator;
private BytesRef nextQueryPoint;
private final byte[] lastMaxPackedValue;
private final BytesRef scratch = new BytesRef();
private final PrefixCodedTerms sortedPackedPoints;
public MergePointVisitor(TermIterator iterator, int[] hitCount, DocIdSetBuilder result) throws IOException {
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, int[] hitCount, DocIdSetBuilder result) throws IOException {
this.hitCount = hitCount;
this.result = result;
this.iterator = iterator;
nextQueryPoint = iterator.next();
this.sortedPackedPoints = sortedPackedPoints;
lastMaxPackedValue = new byte[bytesPerDim];
scratch.length = bytesPerDim;
resetIterator();
}
private void resetIterator() {
this.iterator = sortedPackedPoints.iterator();
nextQueryPoint = iterator.next();
}
@Override
@ -195,6 +205,14 @@ public class PointInSetQuery extends Query {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
// NOTE: this is messy ... we need it in cases where a single vistor (us) is shared across multiple leaf readers
// (e.g. SlowCompositeReaderWrapper), in which case we need to reset our iterator to re-start the merge sort. Maybe we should instead
// add an explicit .start() to IntersectVisitor, and clarify the semantics that in the 1D case all cells will be visited in order?
if (StringHelper.compare(bytesPerDim, lastMaxPackedValue, 0, minPackedValue, 0) > 0) {
resetIterator();
}
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, bytesPerDim);
while (nextQueryPoint != null) {
scratch.bytes = minPackedValue;

View File

@ -51,6 +51,8 @@ import org.apache.lucene.util.StringHelper;
* @see FloatPoint
* @see DoublePoint
* @see BinaryPoint
*
* @lucene.experimental
*/
public abstract class PointRangeQuery extends Query {
final String field;

View File

@ -1632,10 +1632,16 @@ public abstract class LuceneTestCase extends Assert {
for (int i = 0, c = random.nextInt(6)+1; i < c; i++) {
switch(random.nextInt(6)) {
case 0:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with SlowCompositeReaderWrapper.wrap");
}
r = SlowCompositeReaderWrapper.wrap(r);
break;
case 1:
// will create no FC insanity in atomic case, as ParallelLeafReader has own cache key:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with ParallelLeaf/CompositeReader");
}
r = (r instanceof LeafReader) ?
new ParallelLeafReader((LeafReader) r) :
new ParallelCompositeReader((CompositeReader) r);
@ -1644,6 +1650,9 @@ public abstract class LuceneTestCase extends Assert {
// Häckidy-Hick-Hack: a standard MultiReader will cause FC insanity, so we use
// QueryUtils' reader with a fake cache key, so insanity checker cannot walk
// along our reader:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with FCInvisibleMultiReader");
}
r = new FCInvisibleMultiReader(r);
break;
case 3:
@ -1656,6 +1665,9 @@ public abstract class LuceneTestCase extends Assert {
final int end = allFields.isEmpty() ? 0 : random.nextInt(allFields.size());
final Set<String> fields = new HashSet<>(allFields.subList(0, end));
// will create no FC insanity as ParallelLeafReader has own cache key:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with ParallelLeafReader(SlowCompositeReaderWapper)");
}
r = new ParallelLeafReader(
new FieldFilterLeafReader(ar, fields, false),
new FieldFilterLeafReader(ar, fields, true)
@ -1665,6 +1677,9 @@ public abstract class LuceneTestCase extends Assert {
// Häckidy-Hick-Hack: a standard Reader will cause FC insanity, so we use
// QueryUtils' reader with a fake cache key, so insanity checker cannot walk
// along our reader:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with AssertingLeaf/DirectoryReader");
}
if (r instanceof LeafReader) {
r = new AssertingLeafReader((LeafReader)r);
} else if (r instanceof DirectoryReader) {
@ -1672,6 +1687,9 @@ public abstract class LuceneTestCase extends Assert {
}
break;
case 5:
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with MismatchedLeaf/DirectoryReader");
}
if (r instanceof LeafReader) {
r = new MismatchedLeafReader((LeafReader)r, random);
} else if (r instanceof DirectoryReader) {