From 807dad7038e47636e23a4e035a3ea0250663619b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 10 Jul 2011 19:25:11 +0000 Subject: [PATCH] LUCENE-3295: fix several issues in BitVector.writeClearedDgaps git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1144942 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/codecs/memory/MemoryCodec.java | 1 - .../org/apache/lucene/util/BitVector.java | 27 +++++-- .../org/apache/lucene/util/TestBitVector.java | 75 ++++++++++++++++++- 3 files changed, 91 insertions(+), 12 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java index 6d8d7a4b740..0c6f51493c8 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java @@ -213,7 +213,6 @@ public class MemoryCodec extends Codec { System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF)); } } - builder.add(text, new BytesRef(spare)); termCount++; } diff --git a/lucene/src/java/org/apache/lucene/util/BitVector.java b/lucene/src/java/org/apache/lucene/util/BitVector.java index a9f76fe5866..ba82cbcd20c 100644 --- a/lucene/src/java/org/apache/lucene/util/BitVector.java +++ b/lucene/src/java/org/apache/lucene/util/BitVector.java @@ -162,6 +162,7 @@ public final class BitVector implements Cloneable, Bits { } count = c; } + assert count <= size: "count=" + count + " size=" + size; return count; } @@ -227,6 +228,7 @@ public final class BitVector implements Cloneable, Bits { } else { writeBits(output); } + assert verifyCount(); } finally { output.close(); } @@ -278,14 +280,13 @@ public final class BitVector implements Cloneable, Bits { output.writeInt(count()); // write count int last=0; int numCleared = size()-count(); - int m = bits.length; - for (int i=0; i0; i++) { - if (bits[i]!=0xff) { + for (int i=0; i0; i++) { + if (bits[i] != (byte) 0xff) { output.writeVInt(i-last); output.writeByte(bits[i]); last = i; numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]); - assert numCleared >= 0; + assert numCleared >= 0 || (i == (bits.length-1) && numCleared == -(8-(size&7))); } } } @@ -319,7 +320,7 @@ public final class BitVector implements Cloneable, Bits { final int bytesPerSetBit = expectedDGapBytes + 1; // note: adding 32 because we start with ((int) -1) to indicate d-gaps format. - final long expectedBits = 32 + 8 * bytesPerSetBit * count(); + final long expectedBits = 32 + 8 * bytesPerSetBit * clearedCount; // note: factor is for read/write of byte-arrays being faster than vints. final long factor = 10; @@ -352,11 +353,21 @@ public final class BitVector implements Cloneable, Bits { } else { readBits(input); } + assert verifyCount(); } finally { input.close(); } } + // asserts only + private boolean verifyCount() { + assert count != -1; + final int countSav = count; + count = -1; + assert countSav == count(): "saved count was " + countSav + " but recomputed count is " + count; + return true; + } + /** Read as a bit set */ private void readBits(IndexInput input) throws IOException { count = input.readInt(); // read count @@ -368,7 +379,7 @@ public final class BitVector implements Cloneable, Bits { private void readSetDgaps(IndexInput input) throws IOException { size = input.readInt(); // (re)read size count = input.readInt(); // read count - bits = new byte[(size >> 3) + 1]; // allocate bits + bits = new byte[getNumBytes(size)]; // allocate bits int last=0; int n = count(); while (n>0) { @@ -383,7 +394,7 @@ public final class BitVector implements Cloneable, Bits { private void readClearedDgaps(IndexInput input) throws IOException { size = input.readInt(); // (re)read size count = input.readInt(); // read count - bits = new byte[(size >> 3) + 1]; // allocate bits + bits = new byte[getNumBytes(size)]; // allocate bits Arrays.fill(bits, (byte) 0xff); clearUnusedBits(); int last=0; @@ -392,7 +403,7 @@ public final class BitVector implements Cloneable, Bits { last += input.readVInt(); bits[last] = input.readByte(); numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF]; - assert numCleared >= 0; + assert numCleared >= 0 || (last == (bits.length-1) && numCleared == -(8-(size&7))); } } } diff --git a/lucene/src/test/org/apache/lucene/util/TestBitVector.java b/lucene/src/test/org/apache/lucene/util/TestBitVector.java index 1944cc0a854..d45f58cb3b0 100644 --- a/lucene/src/test/org/apache/lucene/util/TestBitVector.java +++ b/lucene/src/test/org/apache/lucene/util/TestBitVector.java @@ -19,7 +19,7 @@ package org.apache.lucene.util; import java.io.IOException; -import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; @@ -153,7 +153,7 @@ public class TestBitVector extends LuceneTestCase assertTrue(doCompare(bv,compare)); } } - + /** * Test r/w when size/count cause switching between bit-set and d-gaps file formats. */ @@ -165,6 +165,26 @@ public class TestBitVector extends LuceneTestCase doTestDgaps(10000,40,43); doTestDgaps(100000,415,418); doTestDgaps(1000000,3123,3126); + // now exercise skipping of fully populated byte in the bitset (they are omitted if bitset is sparse) + MockDirectoryWrapper d = new MockDirectoryWrapper(random, new RAMDirectory()); + d.setPreventDoubleWrite(false); + BitVector bv = new BitVector(10000); + bv.set(0); + for (int i = 8; i < 16; i++) { + bv.set(i); + } // make sure we have once byte full of set bits + for (int i = 32; i < 40; i++) { + bv.set(i); + } // get a second byte full of set bits + // add some more bits here + for (int i = 40; i < 10000; i++) { + if (random.nextInt(1000) == 0) { + bv.set(i); + } + } + bv.write(d, "TESTBV", newIOContext(random)); + BitVector compare = new BitVector(d, "TESTBV", newIOContext(random)); + assertTrue(doCompare(bv,compare)); } private void doTestDgaps(int size, int count1, int count2) throws IOException { @@ -183,7 +203,7 @@ public class TestBitVector extends LuceneTestCase assertTrue(doCompare(bv,bv2)); bv = bv2; bv.clear(i); - assertEquals(i+1,size-bv.count()); + assertEquals(i+1, size-bv.count()); bv.write(d, "TESTBV", newIOContext(random)); } // now start decreasing number of set bits @@ -196,6 +216,54 @@ public class TestBitVector extends LuceneTestCase bv.write(d, "TESTBV", newIOContext(random)); } } + + public void testSparseWrite() throws IOException { + Directory d = newDirectory(); + final int numBits = 10240; + BitVector bv = new BitVector(numBits); + bv.invertAll(); + int numToClear = random.nextInt(5); + for(int i=0;i