From f5831d86ae7af5b6e2c771f75ed167f8270acf50 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 16 Apr 2009 14:29:26 +0000 Subject: [PATCH] LUCENE-1605: add BitVector.subset git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765649 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 + .../org/apache/lucene/util/BitVector.java | 27 +++++++- .../org/apache/lucene/util/TestBitVector.java | 66 +++++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 082c6f21fca..808cd452f83 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -285,6 +285,9 @@ New features during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also more friendly to subclassing. (Uwe Schindler via Mike McCandless) +22. LUCENE-1605: Added BitVector.subset(). (Jeremy Volkman via Mike + McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing diff --git a/src/java/org/apache/lucene/util/BitVector.java b/src/java/org/apache/lucene/util/BitVector.java index 3c34c67486d..a342e4d9b12 100644 --- a/src/java/org/apache/lucene/util/BitVector.java +++ b/src/java/org/apache/lucene/util/BitVector.java @@ -238,5 +238,30 @@ public final class BitVector implements Cloneable { n -= BYTE_COUNTS[bits[last] & 0xFF]; } } - + + /** + * Retrieve a subset of this BitVector. + * + * @param start + * starting index, inclusive + * @param end + * ending index, exclusive + * @return subset + */ + public BitVector subset(int start, int end) { + if (start < 0 || end > size() || end < start) + throw new IndexOutOfBoundsException(); + // Special case -- return empty vector is start == end + if (end == start) return new BitVector(0); + byte[] bits = new byte[((end - start - 1) >>> 3) + 1]; + int s = start >>> 3; + for (int i = 0; i < bits.length; i++) { + int cur = 0xFF & this.bits[i + s]; + int next = i + s + 1 >= this.bits.length ? 0 : 0xFF & this.bits[i + s + 1]; + bits[i] = (byte) ((cur >>> (start & 7)) | ((next << (8 - (start & 7))))); + } + int bitsToClear = (bits.length * 8 - (end - start)) % 8; + bits[bits.length - 1] &= ~(0xFF << (8 - bitsToClear)); + return new BitVector(bits, end - start); + } } diff --git a/src/test/org/apache/lucene/util/TestBitVector.java b/src/test/org/apache/lucene/util/TestBitVector.java index 0433e67f99d..fe27ada9fca 100644 --- a/src/test/org/apache/lucene/util/TestBitVector.java +++ b/src/test/org/apache/lucene/util/TestBitVector.java @@ -218,4 +218,70 @@ public class TestBitVector extends LuceneTestCase } return equal; } + + private static int[] subsetPattern = new int[] { 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1 }; + + /** + * Tests BitVector.subset() against the above pattern + */ + public void testSubset() { + doTestSubset(0, 0); + doTestSubset(0, 20); + doTestSubset(0, 7); + doTestSubset(0, 8); + doTestSubset(0, 9); + doTestSubset(0, 15); + doTestSubset(0, 16); + doTestSubset(0, 17); + doTestSubset(1, 7); + doTestSubset(1, 8); + doTestSubset(1, 9); + doTestSubset(1, 15); + doTestSubset(1, 16); + doTestSubset(1, 17); + doTestSubset(2, 20); + doTestSubset(3, 20); + doTestSubset(4, 20); + doTestSubset(5, 20); + doTestSubset(6, 20); + doTestSubset(7, 14); + doTestSubset(7, 15); + doTestSubset(7, 16); + doTestSubset(8, 15); + doTestSubset(9, 20); + doTestSubset(10, 20); + doTestSubset(11, 20); + doTestSubset(12, 20); + doTestSubset(13, 20); + } + + /** + * Compare a subset against the corresponding portion of the test pattern + */ + private void doTestSubset(int start, int end) { + BitVector full = createSubsetTestVector(); + BitVector subset = full.subset(start, end); + assertEquals(end - start, subset.size()); + int count = 0; + for (int i = start, j = 0; i < end; i++, j++) { + if (subsetPattern[i] == 1) { + count++; + assertTrue(subset.get(j)); + } else { + assertFalse(subset.get(j)); + } + } + assertEquals(count, subset.count()); + } + + private BitVector createSubsetTestVector() { + BitVector bv = new BitVector(subsetPattern.length); + for (int i = 0; i < subsetPattern.length; i++) { + if (subsetPattern[i] == 1) { + bv.set(i); + } + } + return bv; + } + }