mirror of https://github.com/apache/lucene.git
LUCENE-3295: fix several issues in BitVector.writeClearedDgaps
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1144942 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
31602b986b
commit
807dad7038
|
@ -213,7 +213,6 @@ public class MemoryCodec extends Codec {
|
||||||
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
|
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
builder.add(text, new BytesRef(spare));
|
builder.add(text, new BytesRef(spare));
|
||||||
termCount++;
|
termCount++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,6 +162,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
}
|
}
|
||||||
count = c;
|
count = c;
|
||||||
}
|
}
|
||||||
|
assert count <= size: "count=" + count + " size=" + size;
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,6 +228,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
} else {
|
} else {
|
||||||
writeBits(output);
|
writeBits(output);
|
||||||
}
|
}
|
||||||
|
assert verifyCount();
|
||||||
} finally {
|
} finally {
|
||||||
output.close();
|
output.close();
|
||||||
}
|
}
|
||||||
|
@ -278,14 +280,13 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
output.writeInt(count()); // write count
|
output.writeInt(count()); // write count
|
||||||
int last=0;
|
int last=0;
|
||||||
int numCleared = size()-count();
|
int numCleared = size()-count();
|
||||||
int m = bits.length;
|
for (int i=0; i<bits.length && numCleared>0; i++) {
|
||||||
for (int i=0; i<m && numCleared>0; i++) {
|
if (bits[i] != (byte) 0xff) {
|
||||||
if (bits[i]!=0xff) {
|
|
||||||
output.writeVInt(i-last);
|
output.writeVInt(i-last);
|
||||||
output.writeByte(bits[i]);
|
output.writeByte(bits[i]);
|
||||||
last = i;
|
last = i;
|
||||||
numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]);
|
numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]);
|
||||||
assert numCleared >= 0;
|
assert numCleared >= 0 || (i == (bits.length-1) && numCleared == -(8-(size&7)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -319,7 +320,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
final int bytesPerSetBit = expectedDGapBytes + 1;
|
final int bytesPerSetBit = expectedDGapBytes + 1;
|
||||||
|
|
||||||
// note: adding 32 because we start with ((int) -1) to indicate d-gaps format.
|
// note: adding 32 because we start with ((int) -1) to indicate d-gaps format.
|
||||||
final long expectedBits = 32 + 8 * bytesPerSetBit * count();
|
final long expectedBits = 32 + 8 * bytesPerSetBit * clearedCount;
|
||||||
|
|
||||||
// note: factor is for read/write of byte-arrays being faster than vints.
|
// note: factor is for read/write of byte-arrays being faster than vints.
|
||||||
final long factor = 10;
|
final long factor = 10;
|
||||||
|
@ -352,11 +353,21 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
} else {
|
} else {
|
||||||
readBits(input);
|
readBits(input);
|
||||||
}
|
}
|
||||||
|
assert verifyCount();
|
||||||
} finally {
|
} finally {
|
||||||
input.close();
|
input.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// asserts only
|
||||||
|
private boolean verifyCount() {
|
||||||
|
assert count != -1;
|
||||||
|
final int countSav = count;
|
||||||
|
count = -1;
|
||||||
|
assert countSav == count(): "saved count was " + countSav + " but recomputed count is " + count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/** Read as a bit set */
|
/** Read as a bit set */
|
||||||
private void readBits(IndexInput input) throws IOException {
|
private void readBits(IndexInput input) throws IOException {
|
||||||
count = input.readInt(); // read count
|
count = input.readInt(); // read count
|
||||||
|
@ -368,7 +379,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
private void readSetDgaps(IndexInput input) throws IOException {
|
private void readSetDgaps(IndexInput input) throws IOException {
|
||||||
size = input.readInt(); // (re)read size
|
size = input.readInt(); // (re)read size
|
||||||
count = input.readInt(); // read count
|
count = input.readInt(); // read count
|
||||||
bits = new byte[(size >> 3) + 1]; // allocate bits
|
bits = new byte[getNumBytes(size)]; // allocate bits
|
||||||
int last=0;
|
int last=0;
|
||||||
int n = count();
|
int n = count();
|
||||||
while (n>0) {
|
while (n>0) {
|
||||||
|
@ -383,7 +394,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
private void readClearedDgaps(IndexInput input) throws IOException {
|
private void readClearedDgaps(IndexInput input) throws IOException {
|
||||||
size = input.readInt(); // (re)read size
|
size = input.readInt(); // (re)read size
|
||||||
count = input.readInt(); // read count
|
count = input.readInt(); // read count
|
||||||
bits = new byte[(size >> 3) + 1]; // allocate bits
|
bits = new byte[getNumBytes(size)]; // allocate bits
|
||||||
Arrays.fill(bits, (byte) 0xff);
|
Arrays.fill(bits, (byte) 0xff);
|
||||||
clearUnusedBits();
|
clearUnusedBits();
|
||||||
int last=0;
|
int last=0;
|
||||||
|
@ -392,7 +403,7 @@ public final class BitVector implements Cloneable, Bits {
|
||||||
last += input.readVInt();
|
last += input.readVInt();
|
||||||
bits[last] = input.readByte();
|
bits[last] = input.readByte();
|
||||||
numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF];
|
numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF];
|
||||||
assert numCleared >= 0;
|
assert numCleared >= 0 || (last == (bits.length-1) && numCleared == -(8-(size&7)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ public class TestBitVector extends LuceneTestCase
|
||||||
assertTrue(doCompare(bv,compare));
|
assertTrue(doCompare(bv,compare));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test r/w when size/count cause switching between bit-set and d-gaps file formats.
|
* Test r/w when size/count cause switching between bit-set and d-gaps file formats.
|
||||||
*/
|
*/
|
||||||
|
@ -165,6 +165,26 @@ public class TestBitVector extends LuceneTestCase
|
||||||
doTestDgaps(10000,40,43);
|
doTestDgaps(10000,40,43);
|
||||||
doTestDgaps(100000,415,418);
|
doTestDgaps(100000,415,418);
|
||||||
doTestDgaps(1000000,3123,3126);
|
doTestDgaps(1000000,3123,3126);
|
||||||
|
// now exercise skipping of fully populated byte in the bitset (they are omitted if bitset is sparse)
|
||||||
|
MockDirectoryWrapper d = new MockDirectoryWrapper(random, new RAMDirectory());
|
||||||
|
d.setPreventDoubleWrite(false);
|
||||||
|
BitVector bv = new BitVector(10000);
|
||||||
|
bv.set(0);
|
||||||
|
for (int i = 8; i < 16; i++) {
|
||||||
|
bv.set(i);
|
||||||
|
} // make sure we have once byte full of set bits
|
||||||
|
for (int i = 32; i < 40; i++) {
|
||||||
|
bv.set(i);
|
||||||
|
} // get a second byte full of set bits
|
||||||
|
// add some more bits here
|
||||||
|
for (int i = 40; i < 10000; i++) {
|
||||||
|
if (random.nextInt(1000) == 0) {
|
||||||
|
bv.set(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bv.write(d, "TESTBV", newIOContext(random));
|
||||||
|
BitVector compare = new BitVector(d, "TESTBV", newIOContext(random));
|
||||||
|
assertTrue(doCompare(bv,compare));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTestDgaps(int size, int count1, int count2) throws IOException {
|
private void doTestDgaps(int size, int count1, int count2) throws IOException {
|
||||||
|
@ -183,7 +203,7 @@ public class TestBitVector extends LuceneTestCase
|
||||||
assertTrue(doCompare(bv,bv2));
|
assertTrue(doCompare(bv,bv2));
|
||||||
bv = bv2;
|
bv = bv2;
|
||||||
bv.clear(i);
|
bv.clear(i);
|
||||||
assertEquals(i+1,size-bv.count());
|
assertEquals(i+1, size-bv.count());
|
||||||
bv.write(d, "TESTBV", newIOContext(random));
|
bv.write(d, "TESTBV", newIOContext(random));
|
||||||
}
|
}
|
||||||
// now start decreasing number of set bits
|
// now start decreasing number of set bits
|
||||||
|
@ -196,6 +216,54 @@ public class TestBitVector extends LuceneTestCase
|
||||||
bv.write(d, "TESTBV", newIOContext(random));
|
bv.write(d, "TESTBV", newIOContext(random));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSparseWrite() throws IOException {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
final int numBits = 10240;
|
||||||
|
BitVector bv = new BitVector(numBits);
|
||||||
|
bv.invertAll();
|
||||||
|
int numToClear = random.nextInt(5);
|
||||||
|
for(int i=0;i<numToClear;i++) {
|
||||||
|
bv.clear(random.nextInt(numBits));
|
||||||
|
}
|
||||||
|
bv.write(d, "test", newIOContext(random));
|
||||||
|
final long size = d.fileLength("test");
|
||||||
|
assertTrue("size=" + size, size < 100);
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testClearedBitNearEnd() throws IOException {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
final int numBits = _TestUtil.nextInt(random, 7, 1000);
|
||||||
|
BitVector bv = new BitVector(numBits);
|
||||||
|
bv.invertAll();
|
||||||
|
bv.clear(numBits-_TestUtil.nextInt(random, 1, 7));
|
||||||
|
bv.write(d, "test", newIOContext(random));
|
||||||
|
assertEquals(numBits-1, bv.count());
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMostlySet() throws IOException {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
final int numBits = _TestUtil.nextInt(random, 30, 1000);
|
||||||
|
for(int numClear=0;numClear<20;numClear++) {
|
||||||
|
BitVector bv = new BitVector(numBits);
|
||||||
|
bv.invertAll();
|
||||||
|
int count = 0;
|
||||||
|
while(count < numClear) {
|
||||||
|
final int bit = random.nextInt(numBits);
|
||||||
|
// Don't use getAndClear, so that count is recomputed
|
||||||
|
if (bv.get(bit)) {
|
||||||
|
bv.clear(bit);
|
||||||
|
count++;
|
||||||
|
assertEquals(numBits-count, bv.count());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare two BitVectors.
|
* Compare two BitVectors.
|
||||||
* This should really be an equals method on the BitVector itself.
|
* This should really be an equals method on the BitVector itself.
|
||||||
|
@ -211,6 +279,7 @@ public class TestBitVector extends LuceneTestCase
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
assertEquals(bv.count(), compare.count());
|
||||||
return equal;
|
return equal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue