LUCENE-5100: BaseDocIdSetTestCase.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1502448 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-07-12 07:13:47 +00:00
parent 5a42052fba
commit d24f6c2f5f
8 changed files with 290 additions and 209 deletions

View File

@ -33,7 +33,7 @@ public class EliasFanoDocIdSet extends DocIdSet {
* @param numValues The number of values that can be encoded.
* @param upperBound At least the highest value that will be encoded.
*/
public EliasFanoDocIdSet(long numValues, long upperBound) {
public EliasFanoDocIdSet(int numValues, int upperBound) {
efEncoder = new EliasFanoEncoder(numValues, upperBound);
}

View File

@ -119,7 +119,7 @@ public class EliasFanoEncoder {
if ((numValues > 0L) && (upperBound < 0L)) {
throw new IllegalArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0");
}
this.upperBound = upperBound;
this.upperBound = numValues > 0 ? upperBound : -1L; // if there is no value, -1 is the best upper bound
int nLowBits = 0;
if (this.numValues > 0) { // nLowBits = max(0; floor(2log(upperBound/numValues)))
long lowBitsFac = this.upperBound / this.numValues;

View File

@ -0,0 +1,30 @@
package org.apache.lucene.util;
import java.io.IOException;
import java.util.BitSet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestDocIdBitSet extends BaseDocIdSetTestCase<DocIdBitSet> {
@Override
public DocIdBitSet copyOf(BitSet bs, int length) throws IOException {
return new DocIdBitSet((BitSet) bs.clone());
}
}

View File

@ -22,7 +22,16 @@ import java.util.BitSet;
import org.apache.lucene.search.DocIdSetIterator;
public class TestFixedBitSet extends LuceneTestCase {
public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
@Override
public FixedBitSet copyOf(BitSet bs, int length) throws IOException {
final FixedBitSet set = new FixedBitSet(length);
for (int doc = bs.nextSetBit(0); doc != -1; doc = bs.nextSetBit(doc + 1)) {
set.set(doc);
}
return set;
}
void doGet(BitSet a, FixedBitSet b) {
int max = b.length();

View File

@ -17,11 +17,21 @@
package org.apache.lucene.util;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.search.DocIdSetIterator;
public class TestOpenBitSet extends LuceneTestCase {
public class TestOpenBitSet extends BaseDocIdSetTestCase<OpenBitSet> {
@Override
public OpenBitSet copyOf(BitSet bs, int length) throws IOException {
final OpenBitSet set = new OpenBitSet(length);
for (int doc = bs.nextSetBit(0); doc != -1; doc = bs.nextSetBit(doc + 1)) {
set.set(doc);
}
return set;
}
void doGet(BitSet a, OpenBitSet b) {
int max = a.size();
@ -320,6 +330,7 @@ public class TestOpenBitSet extends LuceneTestCase {
checkPrevSetBitArray(new int[] {0});
checkPrevSetBitArray(new int[] {0,2});
}
}

View File

@ -19,93 +19,44 @@ package org.apache.lucene.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
public class TestWAH8DocIdSet extends BaseDocIdSetTestCase<WAH8DocIdSet> {
public class TestWAH8DocIdSet extends LuceneTestCase {
private static FixedBitSet randomSet(int numBits, int numBitsSet) {
assert numBitsSet <= numBits;
final FixedBitSet set = new FixedBitSet(numBits);
if (numBitsSet == numBits) {
set.set(0, set.length());
} else {
for (int i = 0; i < numBitsSet; ++i) {
while (true) {
final int o = random().nextInt(numBits);
if (!set.get(o)) {
set.set(o);
break;
}
}
}
@Override
public WAH8DocIdSet copyOf(BitSet bs, int length) throws IOException {
final int indexInterval = _TestUtil.nextInt(random(), 8, 256);
final WAH8DocIdSet.Builder builder = new WAH8DocIdSet.Builder().setIndexInterval(indexInterval);
for (int i = bs.nextSetBit(0); i != -1; i = bs.nextSetBit(i + 1)) {
builder.add(i);
}
return set;
return builder.build();
}
private static FixedBitSet randomSet(int numBits, float percentSet) {
return randomSet(numBits, (int) (percentSet * numBits));
}
public void testAgainstFixedBitSet() throws IOException {
final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat() / 2, 0.9f, 1f}) {
final FixedBitSet set = randomSet(numBits, percentSet);
final WAH8DocIdSet copy = WAH8DocIdSet.copyOf(set.iterator());
assertEquals(numBits, set, copy);
}
}
public void assertEquals(int numBits, FixedBitSet ds1, WAH8DocIdSet ds2) throws IOException {
@Override
public void assertEquals(int numBits, BitSet ds1, WAH8DocIdSet ds2)
throws IOException {
super.assertEquals(numBits, ds1, ds2);
assertEquals(ds1.cardinality(), ds2.cardinality());
// nextDoc
DocIdSetIterator it1 = ds1.iterator();
DocIdSetIterator it2 = ds2.iterator();
assertEquals(it1.docID(), it2.docID());
for (int doc = it1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it1.nextDoc()) {
assertEquals(doc, it2.nextDoc());
assertEquals(it1.docID(), it2.docID());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it2.nextDoc());
assertEquals(it1.docID(), it2.docID());
// nextDoc / advance
it1 = ds1.iterator();
it2 = ds2.iterator();
for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) {
if (random().nextBoolean()) {
doc = it1.nextDoc();
assertEquals(doc, it2.nextDoc());
assertEquals(it1.docID(), it2.docID());
} else {
final int target = doc + 1 + random().nextInt(random().nextBoolean() ? 64 : numBits / 64);
doc = it1.advance(target);
assertEquals(doc, it2.advance(target));
assertEquals(it1.docID(), it2.docID());
}
}
}
public void testUnion() throws IOException {
final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
final int numDocIdSets = _TestUtil.nextInt(random(), 0, 4);
final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
final List<BitSet> fixedSets = new ArrayList<BitSet>(numDocIdSets);
for (int i = 0; i < numDocIdSets; ++i) {
fixedSets.add(randomSet(numBits, random().nextFloat() / 16));
}
final List<WAH8DocIdSet> compressedSets = new ArrayList<WAH8DocIdSet>(numDocIdSets);
for (FixedBitSet set : fixedSets) {
compressedSets.add(WAH8DocIdSet.copyOf(set.iterator()));
for (BitSet set : fixedSets) {
compressedSets.add(copyOf(set, numBits));
}
final WAH8DocIdSet union = WAH8DocIdSet.union(compressedSets);
final FixedBitSet expected = new FixedBitSet(numBits);
for (DocIdSet set : fixedSets) {
final DocIdSetIterator it = set.iterator();
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
final BitSet expected = new BitSet(numBits);
for (BitSet set : fixedSets) {
for (int doc = set.nextSetBit(0); doc != -1; doc = set.nextSetBit(doc + 1)) {
expected.set(doc);
}
}
@ -115,27 +66,26 @@ public class TestWAH8DocIdSet extends LuceneTestCase {
public void testIntersection() throws IOException {
final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
final int numDocIdSets = _TestUtil.nextInt(random(), 1, 4);
final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
final List<BitSet> fixedSets = new ArrayList<BitSet>(numDocIdSets);
for (int i = 0; i < numDocIdSets; ++i) {
fixedSets.add(randomSet(numBits, random().nextFloat()));
}
final List<WAH8DocIdSet> compressedSets = new ArrayList<WAH8DocIdSet>(numDocIdSets);
for (FixedBitSet set : fixedSets) {
compressedSets.add(WAH8DocIdSet.copyOf(set.iterator()));
for (BitSet set : fixedSets) {
compressedSets.add(copyOf(set, numBits));
}
final WAH8DocIdSet union = WAH8DocIdSet.intersect(compressedSets);
final FixedBitSet expected = new FixedBitSet(numBits);
expected.set(0, expected.length());
for (DocIdSet set : fixedSets) {
final DocIdSetIterator it = set.iterator();
int lastDoc = -1;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
expected.clear(lastDoc + 1, doc);
lastDoc = doc;
}
if (lastDoc + 1 < expected.length()) {
expected.clear(lastDoc + 1, expected.length());
final BitSet expected = new BitSet(numBits);
expected.set(0, expected.size());
for (BitSet set : fixedSets) {
for (int previousDoc = -1, doc = set.nextSetBit(0); ; previousDoc = doc, doc = set.nextSetBit(doc + 1)) {
if (doc == -1) {
expected.clear(previousDoc + 1, set.size());
break;
} else {
expected.clear(previousDoc + 1, doc);
}
}
}
assertEquals(numBits, expected, union);

View File

@ -18,139 +18,45 @@ package org.apache.lucene.util.packed;
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BaseDocIdSetTestCase;
public class TestEliasFanoDocIdSet extends LuceneTestCase {
private static DocIdSetIterator makeDisi(final int[] docIds) {
class IntArrayDisi extends DocIdSetIterator {
int i = 0;
int docId = -1;
public class TestEliasFanoDocIdSet extends BaseDocIdSetTestCase<EliasFanoDocIdSet> {
@Override
public EliasFanoDocIdSet copyOf(final BitSet bs, final int numBits) throws IOException {
final EliasFanoDocIdSet set = new EliasFanoDocIdSet(bs.cardinality(), numBits - 1);
set.encodeFromDisi(new DocIdSetIterator() {
int doc = -1;
@Override
public int nextDoc() throws IOException {
doc = bs.nextSetBit(doc + 1);
if (doc == -1) {
doc = NO_MORE_DOCS;
}
assert doc < numBits;
return doc;
}
@Override
public int docID() {
return docId;
return doc;
}
@Override
public int nextDoc() {
if (i >= docIds.length) {
docId = NO_MORE_DOCS;
return docId;
}
if (docIds[i] < docId) { // Elias-Fano sequence should be non decreasing.
// The non decreasing condition for Elias-Fano is weaker than normal increasing for DocIdSetIterator
throw new AssertionError("docIds[] out of order");
}
docId = docIds[i++]; // increase i to just after current
return docId;
}
@Override
public int advance(int target) {
// ( ((i == 0) and (docId == -1)) or
// ((i > 0) and (docIds.length > 0) and (i <= docIds.length) and (docId == docIds[i-1])) )
// The behavior of this method is undefined when called with target current, or after the iterator has exhausted.
// Both cases may result in unpredicted behavior, and may throw an assertion error or an IOOBE here.
// So when nextDoc() or advance() were called earlier, the target should be bigger than current docId:
assert (docId == -1) || (docId < target);
// Do a binary search for the index j for which:
// ((j >= i)
// and ((j < docIds.length) implies (docIds[j] >= target))
// and ((j >= 1) implies (docIds[j-1] < target)) )
int j = docIds.length;
while (i < j) {
// ((0 <= i) and (i < j) and (j <= docIds.length)) so (docIds.length > 0)
int m = i + (j - i) / 2; // (i <= m) and (m < j); avoid overflow for (i + j)
if (docIds[m] < target) {
i = m + 1; // (docIds[i-1] < target) and (i <= j)
} else {
j = m; // (docIds[j] >= target) and (i <= j)
}
} // (i == j)
docId = (i >= docIds.length)
? NO_MORE_DOCS // exhausted
: docIds[i++]; // increase i to just after current
return docId;
}
@Override
public long cost() {
return docIds.length;
return bs.cardinality();
}
};
return new IntArrayDisi();
}
public void tstEqualDisisNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
assertEquals(disi0.docID(), disi1.docID());
int d0 = disi0.nextDoc();
int d1 = disi1.nextDoc();
int i = 0;
while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
assertEquals("index " + i, d0, d1);
i++;
d0 = disi0.nextDoc();
d1 = disi1.nextDoc();
}
assertEquals("at end", d0, d1);
}
public void tstEqualDisisAdvanceAsNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
assertEquals(disi0.docID(), disi1.docID());
int d0 = disi0.advance(0);
int d1 = disi1.advance(0);
int i = 0;
while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
assertEquals("index " + i, d0, d1);
i++;
d0 = disi0.advance(d1+1);
d1 = disi1.advance(d1+1);
}
assertEquals("at end disi0 " + disi0 + ", disi1 " + disi1, d0, d1);
}
public void tstEF(int[] docIds) {
int maxDoc = -1;
for (int docId: docIds) {
assert docId >= maxDoc; // non decreasing
maxDoc = docId;
}
try {
EliasFanoDocIdSet efd = new EliasFanoDocIdSet(docIds.length, maxDoc);
efd.encodeFromDisi(makeDisi(docIds));
tstEqualDisisNext( makeDisi(docIds), efd.iterator());
tstEqualDisisAdvanceAsNext(makeDisi(docIds), efd.iterator());
} catch (IOException ioe) {
throw new Error(ioe);
}
}
public void testEmpty() { tstEF(new int[] {}); }
public void testOneElementZero() { tstEF(new int[] {0}); }
public void testTwoElements() { tstEF(new int[] {0,1}); }
public void testOneElementOneBit() {
for (int i = 0; i < (Integer.SIZE-1); i++) {
tstEF(new int[] {1 << i});
}
}
public void testIncreasingSequences() {
final int TEST_NUMDOCS = 129;
int[] docIds = new int[TEST_NUMDOCS];
for (int f = 1; f <= 1025; f++) {
for (int i = 0; i < TEST_NUMDOCS; i++) {
docIds[i] = i*f;
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
tstEF(docIds);
}
});
return set;
}
}
}

View File

@ -0,0 +1,175 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
/** Base test class for {@link DocIdSet}s. */
public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTestCase {
/** Create a copy of the given {@link BitSet} which has <code>length</code> bits. */
public abstract T copyOf(BitSet bs, int length) throws IOException;
/** Create a random set which has <code>numBitsSet</code> of its <code>numBits</code> bits set. */
protected static BitSet randomSet(int numBits, int numBitsSet) {
assert numBitsSet <= numBits;
final BitSet set = new BitSet(numBits);
if (numBitsSet == numBits) {
set.set(0, numBits);
} else {
for (int i = 0; i < numBitsSet; ++i) {
while (true) {
final int o = random().nextInt(numBits);
if (!set.get(o)) {
set.set(o);
break;
}
}
}
}
return set;
}
/** Same as {@link #randomSet(int, int)} but given a load factor. */
protected static BitSet randomSet(int numBits, float percentSet) {
return randomSet(numBits, (int) (percentSet * numBits));
}
/** Test length=0. */
public void testNoBit() throws IOException {
final BitSet bs = new BitSet(1);
final T copy = copyOf(bs, 0);
assertEquals(0, bs, copy);
}
/** Test length=1. */
public void test1Bit() throws IOException {
final BitSet bs = new BitSet(1);
if (random().nextBoolean()) {
bs.set(0);
}
final T copy = copyOf(bs, 1);
assertEquals(1, bs, copy);
}
/** Test length=2. */
public void test2Bits() throws IOException {
final BitSet bs = new BitSet(2);
if (random().nextBoolean()) {
bs.set(0);
}
if (random().nextBoolean()) {
bs.set(1);
}
final T copy = copyOf(bs, 2);
assertEquals(2, bs, copy);
}
/** Compare the content of the set against a {@link BitSet}. */
public void testAgainstBitSet() throws IOException {
final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
// test various random sets with various load factors
for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat() / 2, 0.9f, 1f}) {
final BitSet set = randomSet(numBits, percentSet);
final T copy = copyOf(set, numBits);
assertEquals(numBits, set, copy);
}
// test one doc
BitSet set = new BitSet(numBits);
set.set(0); // 0 first
T copy = copyOf(set, numBits);
assertEquals(numBits, set, copy);
set.clear(0);
set.set(random().nextInt(numBits));
copy = copyOf(set, numBits); // then random index
assertEquals(numBits, set, copy);
// test regular increments
for (int inc = 2; inc < 1000; inc += _TestUtil.nextInt(random(), 1, 100)) {
set = new BitSet(numBits);
for (int d = random().nextInt(10); d < numBits; d += inc) {
set.set(d);
}
copy = copyOf(set, numBits);
assertEquals(numBits, set, copy);
}
}
/** Assert that the content of the {@link DocIdSet} is the same as the content of the {@link BitSet}. */
public void assertEquals(int numBits, BitSet ds1, T ds2) throws IOException {
// nextDoc
DocIdSetIterator it2 = ds2.iterator();
if (it2 == null) {
assertEquals(-1, ds1.nextSetBit(0));
} else {
assertEquals(-1, it2.docID());
for (int doc = ds1.nextSetBit(0); doc != -1; doc = ds1.nextSetBit(doc + 1)) {
assertEquals(doc, it2.nextDoc());
assertEquals(doc, it2.docID());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it2.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it2.docID());
}
// nextDoc / advance
it2 = ds2.iterator();
if (it2 == null) {
assertEquals(-1, ds1.nextSetBit(0));
} else {
for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) {
if (random().nextBoolean()) {
doc = ds1.nextSetBit(doc + 1);
if (doc == -1) {
doc = DocIdSetIterator.NO_MORE_DOCS;
}
assertEquals(doc, it2.nextDoc());
assertEquals(doc, it2.docID());
} else {
final int target = doc + 1 + random().nextInt(random().nextBoolean() ? 64 : Math.max(numBits / 8, 1));
doc = ds1.nextSetBit(target);
if (doc == -1) {
doc = DocIdSetIterator.NO_MORE_DOCS;
}
assertEquals(doc, it2.advance(target));
assertEquals(doc, it2.docID());
}
}
}
// bits()
final Bits bits = ds2.bits();
if (bits != null) {
// test consistency between bits and iterator
it2 = ds2.iterator();
for (int previousDoc = -1, doc = it2.nextDoc(); ; previousDoc = doc, doc = it2.nextDoc()) {
final int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.length() : doc;
for (int i = previousDoc + 1; i < max; ++i) {
assertEquals(false, bits.get(i));
}
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
assertEquals(true, bits.get(doc));
}
}
}
}