mirror of https://github.com/apache/lucene.git
LUCENE-3295: fix several issues in BitVector.writeClearedDgaps
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1144942 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
31602b986b
commit
807dad7038
|
@ -213,7 +213,6 @@ public class MemoryCodec extends Codec {
|
|||
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
|
||||
}
|
||||
}
|
||||
|
||||
builder.add(text, new BytesRef(spare));
|
||||
termCount++;
|
||||
}
|
||||
|
|
|
@ -162,6 +162,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
}
|
||||
count = c;
|
||||
}
|
||||
assert count <= size: "count=" + count + " size=" + size;
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -227,6 +228,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
} else {
|
||||
writeBits(output);
|
||||
}
|
||||
assert verifyCount();
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
|
@ -278,14 +280,13 @@ public final class BitVector implements Cloneable, Bits {
|
|||
output.writeInt(count()); // write count
|
||||
int last=0;
|
||||
int numCleared = size()-count();
|
||||
int m = bits.length;
|
||||
for (int i=0; i<m && numCleared>0; i++) {
|
||||
if (bits[i]!=0xff) {
|
||||
for (int i=0; i<bits.length && numCleared>0; i++) {
|
||||
if (bits[i] != (byte) 0xff) {
|
||||
output.writeVInt(i-last);
|
||||
output.writeByte(bits[i]);
|
||||
last = i;
|
||||
numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]);
|
||||
assert numCleared >= 0;
|
||||
assert numCleared >= 0 || (i == (bits.length-1) && numCleared == -(8-(size&7)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -319,7 +320,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
final int bytesPerSetBit = expectedDGapBytes + 1;
|
||||
|
||||
// note: adding 32 because we start with ((int) -1) to indicate d-gaps format.
|
||||
final long expectedBits = 32 + 8 * bytesPerSetBit * count();
|
||||
final long expectedBits = 32 + 8 * bytesPerSetBit * clearedCount;
|
||||
|
||||
// note: factor is for read/write of byte-arrays being faster than vints.
|
||||
final long factor = 10;
|
||||
|
@ -352,11 +353,21 @@ public final class BitVector implements Cloneable, Bits {
|
|||
} else {
|
||||
readBits(input);
|
||||
}
|
||||
assert verifyCount();
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
// asserts only
|
||||
private boolean verifyCount() {
|
||||
assert count != -1;
|
||||
final int countSav = count;
|
||||
count = -1;
|
||||
assert countSav == count(): "saved count was " + countSav + " but recomputed count is " + count;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Read as a bit set */
|
||||
private void readBits(IndexInput input) throws IOException {
|
||||
count = input.readInt(); // read count
|
||||
|
@ -368,7 +379,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
private void readSetDgaps(IndexInput input) throws IOException {
|
||||
size = input.readInt(); // (re)read size
|
||||
count = input.readInt(); // read count
|
||||
bits = new byte[(size >> 3) + 1]; // allocate bits
|
||||
bits = new byte[getNumBytes(size)]; // allocate bits
|
||||
int last=0;
|
||||
int n = count();
|
||||
while (n>0) {
|
||||
|
@ -383,7 +394,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
private void readClearedDgaps(IndexInput input) throws IOException {
|
||||
size = input.readInt(); // (re)read size
|
||||
count = input.readInt(); // read count
|
||||
bits = new byte[(size >> 3) + 1]; // allocate bits
|
||||
bits = new byte[getNumBytes(size)]; // allocate bits
|
||||
Arrays.fill(bits, (byte) 0xff);
|
||||
clearUnusedBits();
|
||||
int last=0;
|
||||
|
@ -392,7 +403,7 @@ public final class BitVector implements Cloneable, Bits {
|
|||
last += input.readVInt();
|
||||
bits[last] = input.readByte();
|
||||
numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF];
|
||||
assert numCleared >= 0;
|
||||
assert numCleared >= 0 || (last == (bits.length-1) && numCleared == -(8-(size&7)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.util;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
|
@ -153,7 +153,7 @@ public class TestBitVector extends LuceneTestCase
|
|||
assertTrue(doCompare(bv,compare));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test r/w when size/count cause switching between bit-set and d-gaps file formats.
|
||||
*/
|
||||
|
@ -165,6 +165,26 @@ public class TestBitVector extends LuceneTestCase
|
|||
doTestDgaps(10000,40,43);
|
||||
doTestDgaps(100000,415,418);
|
||||
doTestDgaps(1000000,3123,3126);
|
||||
// now exercise skipping of fully populated byte in the bitset (they are omitted if bitset is sparse)
|
||||
MockDirectoryWrapper d = new MockDirectoryWrapper(random, new RAMDirectory());
|
||||
d.setPreventDoubleWrite(false);
|
||||
BitVector bv = new BitVector(10000);
|
||||
bv.set(0);
|
||||
for (int i = 8; i < 16; i++) {
|
||||
bv.set(i);
|
||||
} // make sure we have once byte full of set bits
|
||||
for (int i = 32; i < 40; i++) {
|
||||
bv.set(i);
|
||||
} // get a second byte full of set bits
|
||||
// add some more bits here
|
||||
for (int i = 40; i < 10000; i++) {
|
||||
if (random.nextInt(1000) == 0) {
|
||||
bv.set(i);
|
||||
}
|
||||
}
|
||||
bv.write(d, "TESTBV", newIOContext(random));
|
||||
BitVector compare = new BitVector(d, "TESTBV", newIOContext(random));
|
||||
assertTrue(doCompare(bv,compare));
|
||||
}
|
||||
|
||||
private void doTestDgaps(int size, int count1, int count2) throws IOException {
|
||||
|
@ -183,7 +203,7 @@ public class TestBitVector extends LuceneTestCase
|
|||
assertTrue(doCompare(bv,bv2));
|
||||
bv = bv2;
|
||||
bv.clear(i);
|
||||
assertEquals(i+1,size-bv.count());
|
||||
assertEquals(i+1, size-bv.count());
|
||||
bv.write(d, "TESTBV", newIOContext(random));
|
||||
}
|
||||
// now start decreasing number of set bits
|
||||
|
@ -196,6 +216,54 @@ public class TestBitVector extends LuceneTestCase
|
|||
bv.write(d, "TESTBV", newIOContext(random));
|
||||
}
|
||||
}
|
||||
|
||||
public void testSparseWrite() throws IOException {
|
||||
Directory d = newDirectory();
|
||||
final int numBits = 10240;
|
||||
BitVector bv = new BitVector(numBits);
|
||||
bv.invertAll();
|
||||
int numToClear = random.nextInt(5);
|
||||
for(int i=0;i<numToClear;i++) {
|
||||
bv.clear(random.nextInt(numBits));
|
||||
}
|
||||
bv.write(d, "test", newIOContext(random));
|
||||
final long size = d.fileLength("test");
|
||||
assertTrue("size=" + size, size < 100);
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testClearedBitNearEnd() throws IOException {
|
||||
Directory d = newDirectory();
|
||||
final int numBits = _TestUtil.nextInt(random, 7, 1000);
|
||||
BitVector bv = new BitVector(numBits);
|
||||
bv.invertAll();
|
||||
bv.clear(numBits-_TestUtil.nextInt(random, 1, 7));
|
||||
bv.write(d, "test", newIOContext(random));
|
||||
assertEquals(numBits-1, bv.count());
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testMostlySet() throws IOException {
|
||||
Directory d = newDirectory();
|
||||
final int numBits = _TestUtil.nextInt(random, 30, 1000);
|
||||
for(int numClear=0;numClear<20;numClear++) {
|
||||
BitVector bv = new BitVector(numBits);
|
||||
bv.invertAll();
|
||||
int count = 0;
|
||||
while(count < numClear) {
|
||||
final int bit = random.nextInt(numBits);
|
||||
// Don't use getAndClear, so that count is recomputed
|
||||
if (bv.get(bit)) {
|
||||
bv.clear(bit);
|
||||
count++;
|
||||
assertEquals(numBits-count, bv.count());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
d.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two BitVectors.
|
||||
* This should really be an equals method on the BitVector itself.
|
||||
|
@ -211,6 +279,7 @@ public class TestBitVector extends LuceneTestCase
|
|||
break;
|
||||
}
|
||||
}
|
||||
assertEquals(bv.count(), compare.count());
|
||||
return equal;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue