LUCENE-7051: Remove the "estimate match count" optimization from point queries.

2016-02-29 10:04:17 +01:00 · 2016-02-29 10:04:17 +01:00 · 46d05afdae
parent 5f9db01833
commit 46d05afdae
5 changed files with 9 additions and 48 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@ -19,7 +19,6 @@ package org.apache.lucene.search;

 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Objects;

 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.DoublePoint;
@ -27,7 +26,6 @@ import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -39,8 +37,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.DocIdSetBuilder;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;

 /**
@ -138,18 +134,16 @@ public abstract class PointInSetQuery extends Query {

        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());

-        int[] hitCount = new int[1];
-
        if (numDims == 1) {

          // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
-          values.intersect(field, new MergePointVisitor(sortedPackedPoints, hitCount, result));
+          values.intersect(field, new MergePointVisitor(sortedPackedPoints, result));

        } else {
          // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect.  We could instead do a similar
          // optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
          // index, which is probably tricky!
-          SinglePointVisitor visitor = new SinglePointVisitor(hitCount, result);
+          SinglePointVisitor visitor = new SinglePointVisitor(result);
          TermIterator iterator = sortedPackedPoints.iterator();
          for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
            visitor.setPoint(point);
@ -157,8 +151,7 @@ public abstract class PointInSetQuery extends Query {
          }
        }

-        // NOTE: hitCount[0] will be over-estimate in multi-valued case
-        return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
+        return new ConstantScoreScorer(this, score(), result.build().iterator());
      }
    };
  }
@ -168,15 +161,13 @@ public abstract class PointInSetQuery extends Query {
  private class MergePointVisitor implements IntersectVisitor {

    private final DocIdSetBuilder result;
-    private final int[] hitCount;
    private TermIterator iterator;
    private BytesRef nextQueryPoint;
    private final byte[] lastMaxPackedValue;
    private final BytesRef scratch = new BytesRef();
    private final PrefixCodedTerms sortedPackedPoints;

-    public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, int[] hitCount, DocIdSetBuilder result) throws IOException {
-      this.hitCount = hitCount;
+    public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) throws IOException {
      this.result = result;
      this.sortedPackedPoints = sortedPackedPoints;
      lastMaxPackedValue = new byte[bytesPerDim];
@ -196,7 +187,6 @@ public abstract class PointInSetQuery extends Query {

    @Override
    public void visit(int docID) {
-      hitCount[0]++;
      result.add(docID);
    }

@ -207,7 +197,6 @@ public abstract class PointInSetQuery extends Query {
        int cmp = nextQueryPoint.compareTo(scratch);
        if (cmp == 0) {
          // Query point equals index point, so collect and return
-          hitCount[0]++;
          result.add(docID);
          break;
        } else if (cmp < 0) {
@ -264,11 +253,9 @@ public abstract class PointInSetQuery extends Query {
  private class SinglePointVisitor implements IntersectVisitor {

    private final DocIdSetBuilder result;
-    private final int[] hitCount;
    private final byte[] pointBytes;

-    public SinglePointVisitor(int[] hitCount, DocIdSetBuilder result) {
-      this.hitCount = hitCount;
+    public SinglePointVisitor(DocIdSetBuilder result) {
      this.result = result;
      this.pointBytes = new byte[bytesPerDim * numDims];
    }
@ -286,7 +273,6 @@ public abstract class PointInSetQuery extends Query {

    @Override
    public void visit(int docID) {
-      hitCount[0]++;
      result.add(docID);
    }

@ -295,7 +281,6 @@ public abstract class PointInSetQuery extends Query {
      assert packedValue.length == pointBytes.length;
      if (Arrays.equals(packedValue, pointBytes)) {
        // The point for this doc matches the point we are querying on
-        hitCount[0]++;
        result.add(docID);
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@ -221,7 +221,6 @@ public abstract class PointRangeQuery extends Query {

        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());

-        int[] hitCount = new int[1];
        values.intersect(field,
                         new IntersectVisitor() {

@ -232,7 +231,6 @@ public abstract class PointRangeQuery extends Query {

                           @Override
                           public void visit(int docID) {
-                             hitCount[0]++;
                             result.add(docID);
                           }

@ -251,7 +249,6 @@ public abstract class PointRangeQuery extends Query {
                             }

                             // Doc is in-bounds
-                             hitCount[0]++;
                             result.add(docID);
                           }

@ -280,8 +277,7 @@ public abstract class PointRangeQuery extends Query {
                           }
                         });

-        // NOTE: hitCount[0] will be over-estimate in multi-valued case
-        return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
+        return new ConstantScoreScorer(this, score(), result.build().iterator());
      }
    };
  }
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@ -169,21 +169,9 @@ public final class DocIdSetBuilder {
   * Build a {@link DocIdSet} from the accumulated doc IDs.
   */
  public DocIdSet build() {
-    return build(-1);
-  }
-
-  /**
-   * Expert: build a {@link DocIdSet} with a hint on the cost that the resulting
-   * {@link DocIdSet} would have.
-   */
-  public DocIdSet build(long costHint) {
    try {
      if (bitSet != null) {
-        if (costHint == -1) {
-          return new BitDocIdSet(bitSet);
-        } else {
-          return new BitDocIdSet(bitSet, costHint);
-        }
+        return new BitDocIdSet(bitSet);
      } else {
        LSBRadixSorter sorter = new LSBRadixSorter();
        sorter.sort(buffer, 0, bufferSize);
--- a/lucene/sandbox/src/java/org/apache/lucene/search/PointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/PointInPolygonQuery.java
@ -111,12 +111,10 @@ public class PointInPolygonQuery extends Query {
        }

        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
-        int[] hitCount = new int[1];
        values.intersect(field,
                         new IntersectVisitor() {
                           @Override
                           public void visit(int docID) {
-                             hitCount[0]++;
                             result.add(docID);
                           }

@ -126,7 +124,6 @@ public class PointInPolygonQuery extends Query {
                             double lat = LatLonPoint.decodeLat(NumericUtils.bytesToInt(packedValue, 0));
                             double lon = LatLonPoint.decodeLon(NumericUtils.bytesToInt(packedValue, Integer.BYTES));
                             if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
-                               hitCount[0]++;
                               result.add(docID);
                             }
                           }
@ -155,8 +152,7 @@ public class PointInPolygonQuery extends Query {
                           }
                         });

-        // NOTE: hitCount[0] will be over-estimate in multi-valued case
-        return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
+        return new ConstantScoreScorer(this, score(), result.build().iterator());
      }
    };
  }
--- a/lucene/spatial3d/src/java/org/apache/lucene/geo3d/PointInGeo3DShapeQuery.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/PointInGeo3DShapeQuery.java
@ -92,14 +92,12 @@ class PointInGeo3DShapeQuery extends Query {

        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());

-        int[] hitCount = new int[1];
        values.intersect(field,
                         new IntersectVisitor() {

                           @Override
                           public void visit(int docID) {
                             result.add(docID);
-                             hitCount[0]++;
                           }

                           @Override
@ -110,7 +108,6 @@ class PointInGeo3DShapeQuery extends Query {
                             double z = Geo3DPoint.decodeDimension(planetModel, packedValue, 2 * Integer.BYTES);
                             if (shape.isWithin(x, y, z)) {
                               result.add(docID);
-                               hitCount[0]++;
                             }
                           }

@ -159,8 +156,7 @@ class PointInGeo3DShapeQuery extends Query {
                           }
                         });

-        // NOTE: hitCount[0] will be over-estimate in multi-valued case
-        return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
+        return new ConstantScoreScorer(this, score(), result.build().iterator());
      }
    };
  }