From 9db0ddc22f08670aef7f2b5a5cff724d78115b46 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Fri, 26 Feb 2016 14:54:42 -0500 Subject: [PATCH] detect when the visitor is being re-used e.g. by SlowCompositeReaderWrapper --- .../apache/lucene/search/PointInSetQuery.java | 30 +++++++++++++++---- .../apache/lucene/search/PointRangeQuery.java | 2 ++ .../apache/lucene/util/LuceneTestCase.java | 18 +++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index e695cf79789..a9d8dfc711a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -40,7 +40,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.StringHelper; /** Finds all documents whose point value, previously indexed with e.g. {@link org.apache.lucene.document.LongPoint}, is contained in the - * specified set */ + * specified set. + * + * @lucene.experimental */ public class PointInSetQuery extends Query { // A little bit overkill for us, since all of our "terms" are always in the same field: @@ -124,7 +126,7 @@ public class PointInSetQuery extends Query { if (numDims == 1) { // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set: - values.intersect(field, new MergePointVisitor(sortedPackedPoints.iterator(), hitCount, result)); + values.intersect(field, new MergePointVisitor(sortedPackedPoints, hitCount, result)); } else { // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar @@ -150,16 +152,24 @@ public class PointInSetQuery extends Query { private final DocIdSetBuilder result; private final int[] hitCount; - private final TermIterator iterator; + private TermIterator iterator; private BytesRef nextQueryPoint; + private final byte[] lastMaxPackedValue; private final BytesRef scratch = new BytesRef(); + private final PrefixCodedTerms sortedPackedPoints; - public MergePointVisitor(TermIterator iterator, int[] hitCount, DocIdSetBuilder result) throws IOException { + public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, int[] hitCount, DocIdSetBuilder result) throws IOException { this.hitCount = hitCount; this.result = result; - this.iterator = iterator; - nextQueryPoint = iterator.next(); + this.sortedPackedPoints = sortedPackedPoints; + lastMaxPackedValue = new byte[bytesPerDim]; scratch.length = bytesPerDim; + resetIterator(); + } + + private void resetIterator() { + this.iterator = sortedPackedPoints.iterator(); + nextQueryPoint = iterator.next(); } @Override @@ -195,6 +205,14 @@ public class PointInSetQuery extends Query { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + + // NOTE: this is messy ... we need it in cases where a single vistor (us) is shared across multiple leaf readers + // (e.g. SlowCompositeReaderWrapper), in which case we need to reset our iterator to re-start the merge sort. Maybe we should instead + // add an explicit .start() to IntersectVisitor, and clarify the semantics that in the 1D case all cells will be visited in order? + if (StringHelper.compare(bytesPerDim, lastMaxPackedValue, 0, minPackedValue, 0) > 0) { + resetIterator(); + } + System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, bytesPerDim); while (nextQueryPoint != null) { scratch.bytes = minPackedValue; diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index be52e61209b..804483de7c7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -51,6 +51,8 @@ import org.apache.lucene.util.StringHelper; * @see FloatPoint * @see DoublePoint * @see BinaryPoint + * + * @lucene.experimental */ public abstract class PointRangeQuery extends Query { final String field; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 7cfc18d98ce..462e73ef31a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -1632,10 +1632,16 @@ public abstract class LuceneTestCase extends Assert { for (int i = 0, c = random.nextInt(6)+1; i < c; i++) { switch(random.nextInt(6)) { case 0: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with SlowCompositeReaderWrapper.wrap"); + } r = SlowCompositeReaderWrapper.wrap(r); break; case 1: // will create no FC insanity in atomic case, as ParallelLeafReader has own cache key: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with ParallelLeaf/CompositeReader"); + } r = (r instanceof LeafReader) ? new ParallelLeafReader((LeafReader) r) : new ParallelCompositeReader((CompositeReader) r); @@ -1644,6 +1650,9 @@ public abstract class LuceneTestCase extends Assert { // Häckidy-Hick-Hack: a standard MultiReader will cause FC insanity, so we use // QueryUtils' reader with a fake cache key, so insanity checker cannot walk // along our reader: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with FCInvisibleMultiReader"); + } r = new FCInvisibleMultiReader(r); break; case 3: @@ -1656,6 +1665,9 @@ public abstract class LuceneTestCase extends Assert { final int end = allFields.isEmpty() ? 0 : random.nextInt(allFields.size()); final Set fields = new HashSet<>(allFields.subList(0, end)); // will create no FC insanity as ParallelLeafReader has own cache key: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with ParallelLeafReader(SlowCompositeReaderWapper)"); + } r = new ParallelLeafReader( new FieldFilterLeafReader(ar, fields, false), new FieldFilterLeafReader(ar, fields, true) @@ -1665,6 +1677,9 @@ public abstract class LuceneTestCase extends Assert { // Häckidy-Hick-Hack: a standard Reader will cause FC insanity, so we use // QueryUtils' reader with a fake cache key, so insanity checker cannot walk // along our reader: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with AssertingLeaf/DirectoryReader"); + } if (r instanceof LeafReader) { r = new AssertingLeafReader((LeafReader)r); } else if (r instanceof DirectoryReader) { @@ -1672,6 +1687,9 @@ public abstract class LuceneTestCase extends Assert { } break; case 5: + if (VERBOSE) { + System.out.println("NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r + " with MismatchedLeaf/DirectoryReader"); + } if (r instanceof LeafReader) { r = new MismatchedLeafReader((LeafReader)r, random); } else if (r instanceof DirectoryReader) {