From b5475d10e1b5ef3c07389f36bbe72c9cd5f962d5 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 2 Mar 2016 18:39:57 -0500 Subject: [PATCH] also add PointValues.getDocCount stat, and check it in CheckIndex --- .../codecs/simpletext/SimpleTextBKDReader.java | 4 ++-- .../simpletext/SimpleTextPointReader.java | 18 +++++++++++++++++- .../simpletext/SimpleTextPointWriter.java | 18 ++++++++++++------ .../org/apache/lucene/codecs/PointFormat.java | 5 +++++ .../org/apache/lucene/codecs/PointWriter.java | 4 ++++ .../codecs/lucene60/Lucene60PointReader.java | 11 +++++++++++ .../codecs/lucene60/Lucene60PointWriter.java | 6 ++++-- .../org/apache/lucene/index/CheckIndex.java | 13 ++++++++++++- .../lucene/index/ParallelLeafReader.java | 13 +++++++++++++ .../org/apache/lucene/index/PointValues.java | 3 ++- .../apache/lucene/index/PointValuesWriter.java | 5 +++++ .../lucene/index/SlowCodecReaderWrapper.java | 5 +++++ .../org/apache/lucene/util/bkd/BKDReader.java | 9 ++++++++- .../org/apache/lucene/util/bkd/BKDWriter.java | 16 ++++++++++++---- .../org/apache/lucene/util/bkd/TestBKD.java | 14 +++++++------- .../codecs/asserting/AssertingPointFormat.java | 6 ++++++ .../codecs/cranky/CrankyPointFormat.java | 5 +++++ 17 files changed, 130 insertions(+), 25 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index 6752393da73..09c40ec36fd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -34,8 +34,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_VA class SimpleTextBKDReader extends BKDReader { public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, - byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException { - super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount); + byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException { + super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount, docCount); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java index 76c0431b0d0..05afd93c7d8 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java @@ -39,6 +39,7 @@ import org.apache.lucene.util.bkd.BKDReader; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_FP; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BYTES_PER_DIM; +import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.DOC_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.FIELD_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.FIELD_FP; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.FIELD_FP_NAME; @@ -124,6 +125,10 @@ class SimpleTextPointReader extends PointReader { readLine(dataIn); assert startsWith(POINT_COUNT); long pointCount = parseLong(POINT_COUNT); + + readLine(dataIn); + assert startsWith(DOC_COUNT); + int docCount = parseInt(DOC_COUNT); long[] leafBlockFPs = new long[count]; for(int i=0;i 0) { + FixedBitSet docsSeen = new FixedBitSet(reader.maxDoc()); status.totalValueFields++; int dimCount = fieldInfo.getPointDimensionCount(); int bytesPerDim = fieldInfo.getPointNumBytes(); @@ -1709,6 +1710,12 @@ public final class CheckIndex implements Closeable { byte[] globalMinPackedValue = values.getMinPackedValue(fieldInfo.name); long size = values.size(fieldInfo.name); + int docCount = values.getDocCount(fieldInfo.name); + + if (docCount > size) { + throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points and inconsistent docCount=" + docCount); + } + if (globalMinPackedValue == null) { if (size != 0) { throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size); @@ -1739,6 +1746,7 @@ public final class CheckIndex implements Closeable { public void visit(int docID, byte[] packedValue) { checkPackedValue("packed value", packedValue, docID); pointCountSeen[0]++; + docsSeen.set(docID); for(int dim=0;dim { - new BKDWriter(dir, "bkd", 1, 16, 1000000, 0.001); + new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001); }); assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode")); } @@ -631,7 +631,7 @@ public class TestBKD extends LuceneTestCase { List docIDBases = null; int seg = 0; - BKDWriter w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); + BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT); IndexInput in = null; @@ -685,7 +685,7 @@ public class TestBKD extends LuceneTestCase { seg++; maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000); maxMB = (float) 3.0 + (3*random().nextDouble()); - w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); + w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); lastDocIDBase = docID; } } @@ -701,7 +701,7 @@ public class TestBKD extends LuceneTestCase { out.close(); in = dir.openInput("bkd", IOContext.DEFAULT); seg++; - w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); + w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB); List readers = new ArrayList<>(); for(long fp : toMerge) { in.seek(fp); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java index 71790fc4ba6..892eeef6852 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java @@ -221,6 +221,12 @@ public final class AssertingPointFormat extends PointFormat { // TODO: what to assert? return in.size(fieldName); } + + @Override + public int getDocCount(String fieldName) { + // TODO: what to assert? + return in.getDocCount(fieldName); + } } static class AssertingPointWriter extends PointWriter { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java index 699fb3c7cbd..b663a809e21 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java @@ -176,5 +176,10 @@ class CrankyPointFormat extends PointFormat { public long size(String fieldName) { return delegate.size(fieldName); } + + @Override + public int getDocCount(String fieldName) { + return delegate.getDocCount(fieldName); + } } }