From 24830b7f18146b38078a80bc04f041011ab8689e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 16 Mar 2016 15:34:40 +0100 Subject: [PATCH] LUCENE-7106: Add helpers to compute aggregated stats on points. --- .../org/apache/lucene/index/PointValues.java | 90 ++++++++++++++++++- .../apache/lucene/index/TestPointValues.java | 62 +++++++++++-- 2 files changed, 145 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java index 1fb26544302..a4fd3239851 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java @@ -23,10 +23,10 @@ import java.net.InetAddress; import org.apache.lucene.document.BinaryPoint; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.bkd.BKDWriter; /** @@ -86,6 +86,94 @@ public abstract class PointValues { /** Maximum number of dimensions */ public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS; + /** Return the cumulated number of points across all leaves of the given + * {@link IndexReader}. + * @see PointValues#size(String) */ + public static long size(IndexReader reader, String field) throws IOException { + long size = 0; + for (LeafReaderContext ctx : reader.leaves()) { + PointValues values = ctx.reader().getPointValues(); + if (values != null) { + size += values.size(field); + } + } + return size; + } + + /** Return the cumulated number of docs that have points across all leaves + * of the given {@link IndexReader}. + * @see PointValues#getDocCount(String) */ + public static int getDocCount(IndexReader reader, String field) throws IOException { + int count = 0; + for (LeafReaderContext ctx : reader.leaves()) { + PointValues values = ctx.reader().getPointValues(); + if (values != null) { + count += values.getDocCount(field); + } + } + return count; + } + + /** Return the minimum packed values across all leaves of the given + * {@link IndexReader}. + * @see PointValues#getMinPackedValue(String) */ + public static byte[] getMinPackedValue(IndexReader reader, String field) throws IOException { + byte[] minValue = null; + for (LeafReaderContext ctx : reader.leaves()) { + PointValues values = ctx.reader().getPointValues(); + if (values == null) { + continue; + } + byte[] leafMinValue = values.getMinPackedValue(field); + if (leafMinValue == null) { + continue; + } + if (minValue == null) { + minValue = leafMinValue.clone(); + } else { + final int numDimensions = values.getNumDimensions(field); + final int numBytesPerDimension = values.getBytesPerDimension(field); + for (int i = 0; i < numDimensions; ++i) { + int offset = i * numBytesPerDimension; + if (StringHelper.compare(numBytesPerDimension, leafMinValue, offset, minValue, offset) < 0) { + System.arraycopy(leafMinValue, offset, minValue, offset, numBytesPerDimension); + } + } + } + } + return minValue; + } + + /** Return the maximum packed values across all leaves of the given + * {@link IndexReader}. + * @see PointValues#getMaxPackedValue(String) */ + public static byte[] getMaxPackedValue(IndexReader reader, String field) throws IOException { + byte[] maxValue = null; + for (LeafReaderContext ctx : reader.leaves()) { + PointValues values = ctx.reader().getPointValues(); + if (values == null) { + continue; + } + byte[] leafMaxValue = values.getMaxPackedValue(field); + if (leafMaxValue == null) { + continue; + } + if (maxValue == null) { + maxValue = leafMaxValue.clone(); + } else { + final int numDimensions = values.getNumDimensions(field); + final int numBytesPerDimension = values.getBytesPerDimension(field); + for (int i = 0; i < numDimensions; ++i) { + int offset = i * numBytesPerDimension; + if (StringHelper.compare(numBytesPerDimension, leafMaxValue, offset, maxValue, offset) > 0) { + System.arraycopy(leafMaxValue, offset, maxValue, offset, numBytesPerDimension); + } + } + } + } + return maxValue; + } + /** Default constructor */ protected PointValues() { } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java index 49cbc2a6d7b..c7ca2dcb651 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java @@ -22,12 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.PointsFormat; -import org.apache.lucene.codecs.PointsReader; -import org.apache.lucene.codecs.PointsWriter; -import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; -import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter; import org.apache.lucene.document.BinaryPoint; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoublePoint; @@ -657,4 +651,60 @@ public class TestPointValues extends LuceneTestCase { assertTrue(output.toString(IOUtils.UTF_8).contains("test: points...")); dir.close(); } + + public void testMergedStats() throws IOException { + final int iters = atLeast(3); + for (int iter = 0; iter < iters; ++iter) { + doTestMergedStats(); + } + } + + private static byte[][] randomBinaryValue(int numDims, int numBytesPerDim) { + byte[][] bytes = new byte[numDims][]; + for (int i = 0; i < numDims; ++i) { + bytes[i] = new byte[numBytesPerDim]; + random().nextBytes(bytes[i]); + } + return bytes; + } + + private void doTestMergedStats() throws IOException { + final int numDims = TestUtil.nextInt(random(), 1, 8); + final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16); + Directory dir = new RAMDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + final int numDocs = TestUtil.nextInt(random(), 10, 20); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + final int numPoints = random().nextInt(3); + for (int j = 0; j < numPoints; ++j) { + doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim))); + } + w.addDocument(doc); + if (random().nextBoolean()) { + DirectoryReader.open(w).close(); + } + } + + final IndexReader reader1 = DirectoryReader.open(w); + w.forceMerge(1); + final IndexReader reader2 = DirectoryReader.open(w); + final PointValues expected = getOnlyLeafReader(reader2).getPointValues(); + if (expected == null) { + assertNull(PointValues.getMinPackedValue(reader1, "field")); + assertNull(PointValues.getMaxPackedValue(reader1, "field")); + assertEquals(0, PointValues.getDocCount(reader1, "field")); + assertEquals(0, PointValues.size(reader1, "field")); + } else { + assertArrayEquals( + expected.getMinPackedValue("field"), + PointValues.getMinPackedValue(reader1, "field")); + assertArrayEquals( + expected.getMaxPackedValue("field"), + PointValues.getMaxPackedValue(reader1, "field")); + assertEquals(expected.getDocCount("field"), PointValues.getDocCount(reader1, "field")); + assertEquals(expected.size("field"), PointValues.size(reader1, "field")); + } + IOUtils.close(w, reader1, reader2, dir); + } }