mirror of https://github.com/apache/lucene.git
LUCENE-5983: CachingWrapperFilter now uses RoaringDocIdSet.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1629605 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1a834a153a
commit
bfc1b3deee
|
@ -206,6 +206,9 @@ Optimizations
|
|||
per-segment/per-producer, and norms and doc values merging no longer cause
|
||||
RAM spikes for latent fields. (Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-5983: CachingWrapperFilter now uses a new DocIdSet implementation
|
||||
called RoaringDocIdSet instead of WAH8DocIdSet. (Adrien Grand)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5909: Smoke tester now has better command line parsing and
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RoaringDocIdSet;
|
||||
import org.apache.lucene.util.WAH8DocIdSet;
|
||||
|
||||
/**
|
||||
|
@ -86,12 +87,10 @@ public class CachingWrapperFilter extends Filter implements Accountable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Default cache implementation: uses {@link WAH8DocIdSet}.
|
||||
* Default cache implementation: uses {@link RoaringDocIdSet}.
|
||||
*/
|
||||
protected DocIdSet cacheImpl(DocIdSetIterator iterator, LeafReader reader) throws IOException {
|
||||
WAH8DocIdSet.Builder builder = new WAH8DocIdSet.Builder();
|
||||
builder.add(iterator);
|
||||
return builder.build();
|
||||
return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build();
|
||||
}
|
||||
|
||||
// for testing
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* This {@link DocIdSet} encodes the negation of another {@link DocIdSet}.
|
||||
* It is cacheable and supports random-access if the underlying set is
|
||||
* cacheable and supports random-access.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class NotDocIdSet extends DocIdSet {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(NotDocIdSet.class);
|
||||
|
||||
private final int maxDoc;
|
||||
private final DocIdSet in;
|
||||
|
||||
/** Sole constructor. */
|
||||
public NotDocIdSet(int maxDoc, DocIdSet in) {
|
||||
this.maxDoc = maxDoc;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return in.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
final Bits inBits = in.bits();
|
||||
if (inBits == null) {
|
||||
return null;
|
||||
}
|
||||
return new Bits() {
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return !inBits.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return inBits.length();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + in.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
final DocIdSetIterator inIterator = in.iterator();
|
||||
return new DocIdSetIterator() {
|
||||
|
||||
int doc = -1;
|
||||
int nextSkippedDoc = -1;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = target;
|
||||
if (doc > nextSkippedDoc) {
|
||||
nextSkippedDoc = inIterator.advance(doc);
|
||||
}
|
||||
while (true) {
|
||||
if (doc >= maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
assert doc <= nextSkippedDoc;
|
||||
if (doc != nextSkippedDoc) {
|
||||
return doc;
|
||||
}
|
||||
doc += 1;
|
||||
nextSkippedDoc = inIterator.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// even if there are few docs in this set, iterating over all documents
|
||||
// costs O(maxDoc) in all cases
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,343 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* {@link DocIdSet} implementation inspired from http://roaringbitmap.org/
|
||||
*
|
||||
* The space is divided into blocks of 2^16 bits and each block is encoded
|
||||
* independently. In each block, if less than 2^12 bits are set, then
|
||||
* documents are simply stored in a short[]. If more than 2^16-2^12 bits are
|
||||
* set, then the inverse of the set is encoded in a simple short[]. Otherwise
|
||||
* a {@link FixedBitSet} is used.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class RoaringDocIdSet extends DocIdSet {
|
||||
|
||||
// Number of documents in a block
|
||||
private static final int BLOCK_SIZE = 1 << 16;
|
||||
// The maximum length for an array, beyond that point we switch to a bitset
|
||||
private static final int MAX_ARRAY_LENGTH = 1 << 12;
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(RoaringDocIdSet.class);
|
||||
|
||||
/** A builder of {@link RoaringDocIdSet}s. */
|
||||
public static class Builder {
|
||||
|
||||
private final int maxDoc;
|
||||
private final DocIdSet[] sets;
|
||||
|
||||
private int cardinality;
|
||||
private int lastDocId;
|
||||
private int currentBlock;
|
||||
private int currentBlockCardinality;
|
||||
|
||||
// We start by filling the buffer and when it's full we copy the content of
|
||||
// the buffer to the FixedBitSet and put further documents in that bitset
|
||||
private final short[] buffer;
|
||||
private FixedBitSet denseBuffer;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Builder(int maxDoc) {
|
||||
this.maxDoc = maxDoc;
|
||||
sets = new DocIdSet[(maxDoc + (1 << 16) - 1) >>> 16];
|
||||
lastDocId = -1;
|
||||
currentBlock = -1;
|
||||
buffer = new short[MAX_ARRAY_LENGTH];
|
||||
}
|
||||
|
||||
private void flush() {
|
||||
assert currentBlockCardinality <= BLOCK_SIZE;
|
||||
if (currentBlockCardinality <= MAX_ARRAY_LENGTH) {
|
||||
// Use sparse encoding
|
||||
assert denseBuffer == null;
|
||||
if (currentBlockCardinality > 0) {
|
||||
sets[currentBlock] = new ShortArrayDocIdSet(Arrays.copyOf(buffer, currentBlockCardinality));
|
||||
}
|
||||
} else {
|
||||
assert denseBuffer != null;
|
||||
assert denseBuffer.cardinality() == currentBlockCardinality;
|
||||
if (denseBuffer.length() == BLOCK_SIZE && BLOCK_SIZE - currentBlockCardinality < MAX_ARRAY_LENGTH) {
|
||||
// Doc ids are very dense, inverse the encoding
|
||||
final short[] excludedDocs = new short[BLOCK_SIZE - currentBlockCardinality];
|
||||
denseBuffer.flip(0, denseBuffer.length());
|
||||
int excludedDoc = -1;
|
||||
for (int i = 0; i < excludedDocs.length; ++i) {
|
||||
excludedDoc = denseBuffer.nextSetBit(excludedDoc + 1);
|
||||
assert excludedDoc != -1;
|
||||
excludedDocs[i] = (short) excludedDoc;
|
||||
}
|
||||
assert excludedDoc + 1 == denseBuffer.length() || denseBuffer.nextSetBit(excludedDoc + 1) == -1;
|
||||
sets[currentBlock] = new NotDocIdSet(BLOCK_SIZE, new ShortArrayDocIdSet(excludedDocs));
|
||||
} else {
|
||||
// Neither sparse nor super dense, use a fixed bit set
|
||||
sets[currentBlock] = denseBuffer;
|
||||
}
|
||||
denseBuffer = null;
|
||||
}
|
||||
|
||||
cardinality += currentBlockCardinality;
|
||||
denseBuffer = null;
|
||||
currentBlockCardinality = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new doc-id to this builder.
|
||||
* NOTE: doc ids must be added in order.
|
||||
*/
|
||||
public Builder add(int docId) {
|
||||
if (docId <= lastDocId) {
|
||||
throw new IllegalArgumentException("Doc ids must be added in-order, got " + docId + " which is <= lastDocID=" + lastDocId);
|
||||
}
|
||||
final int block = docId >>> 16;
|
||||
if (block != currentBlock) {
|
||||
// we went to a different block, let's flush what we buffered and start from fresh
|
||||
flush();
|
||||
currentBlock = block;
|
||||
}
|
||||
|
||||
if (currentBlockCardinality < MAX_ARRAY_LENGTH) {
|
||||
buffer[currentBlockCardinality] = (short) docId;
|
||||
} else {
|
||||
if (denseBuffer == null) {
|
||||
// the buffer is full, let's move to a fixed bit set
|
||||
final int numBits = Math.min(1 << 16, maxDoc - (block << 16));
|
||||
denseBuffer = new FixedBitSet(numBits);
|
||||
for (short doc : buffer) {
|
||||
denseBuffer.set(doc & 0xFFFF);
|
||||
}
|
||||
}
|
||||
denseBuffer.set(docId & 0xFFFF);
|
||||
}
|
||||
|
||||
lastDocId = docId;
|
||||
currentBlockCardinality += 1;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add the content of the provided {@link DocIdSetIterator}. */
|
||||
public Builder add(DocIdSetIterator disi) throws IOException {
|
||||
for (int doc = disi.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi.nextDoc()) {
|
||||
add(doc);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Build an instance. */
|
||||
public RoaringDocIdSet build() {
|
||||
flush();
|
||||
return new RoaringDocIdSet(sets, cardinality);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link DocIdSet} implementation that can store documents up to 2^16-1 in a short[].
|
||||
*/
|
||||
private static class ShortArrayDocIdSet extends DocIdSet {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ShortArrayDocIdSet.class);
|
||||
|
||||
private final short[] docIDs;
|
||||
|
||||
private ShortArrayDocIdSet(short[] docIDs) {
|
||||
this.docIDs = docIDs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(docIDs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return new DocIdSetIterator() {
|
||||
|
||||
int i = -1; // this is the index of the current document in the array
|
||||
int doc = -1;
|
||||
|
||||
private int docId(int i) {
|
||||
return docIDs[i] & 0xFFFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (++i >= docIDs.length) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = docId(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docIDs.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// binary search
|
||||
int lo = i + 1;
|
||||
int hi = docIDs.length - 1;
|
||||
while (lo <= hi) {
|
||||
final int mid = (lo + hi) >>> 1;
|
||||
final int midDoc = docId(mid);
|
||||
if (midDoc < target) {
|
||||
lo = mid + 1;
|
||||
} else {
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
if (lo == docIDs.length) {
|
||||
i = docIDs.length;
|
||||
return doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
i = lo;
|
||||
return doc = docId(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final DocIdSet[] docIdSets;
|
||||
private final int cardinality;
|
||||
private final long ramBytesUsed;
|
||||
|
||||
private RoaringDocIdSet(DocIdSet[] docIdSets, int cardinality) {
|
||||
this.docIdSets = docIdSets;
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(docIdSets);
|
||||
for (DocIdSet set : this.docIdSets) {
|
||||
if (set != null) {
|
||||
ramBytesUsed += set.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
this.ramBytesUsed = ramBytesUsed;
|
||||
this.cardinality = cardinality;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
if (cardinality == 0) {
|
||||
return null;
|
||||
}
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
private class Iterator extends DocIdSetIterator {
|
||||
|
||||
int block;
|
||||
DocIdSetIterator sub = null;
|
||||
int doc;
|
||||
|
||||
Iterator() throws IOException {
|
||||
doc = -1;
|
||||
block = 0;
|
||||
while (docIdSets[block] == null) {
|
||||
block += 1;
|
||||
}
|
||||
sub = docIdSets[block].iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
final int subNext = sub.nextDoc();
|
||||
if (subNext == NO_MORE_DOCS) {
|
||||
return firstDocFromNextBlock();
|
||||
}
|
||||
return doc = (block << 16) | subNext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
final int targetBlock = target >>> 16;
|
||||
if (targetBlock != block) {
|
||||
block = targetBlock;
|
||||
if (block >= docIdSets.length) {
|
||||
sub = null;
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (docIdSets[block] == null) {
|
||||
return firstDocFromNextBlock();
|
||||
}
|
||||
sub = docIdSets[block].iterator();
|
||||
}
|
||||
final int subNext = sub.advance(target & 0xFFFF);
|
||||
if (subNext == NO_MORE_DOCS) {
|
||||
return firstDocFromNextBlock();
|
||||
}
|
||||
return doc = (block << 16) | subNext;
|
||||
}
|
||||
|
||||
private int firstDocFromNextBlock() throws IOException {
|
||||
while (true) {
|
||||
block += 1;
|
||||
if (block >= docIdSets.length) {
|
||||
sub = null;
|
||||
return doc = NO_MORE_DOCS;
|
||||
} else if (docIdSets[block] != null) {
|
||||
sub = docIdSets[block].iterator();
|
||||
final int subNext = sub.nextDoc();
|
||||
assert subNext != NO_MORE_DOCS;
|
||||
return doc = (block << 16) | subNext;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cardinality;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Return the exact number of documents that are contained in this set. */
|
||||
public int cardinality() {
|
||||
return cardinality;
|
||||
}
|
||||
|
||||
}
|
|
@ -23,9 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.WAH8DocIdSet;
|
||||
import org.apache.lucene.util.RoaringDocIdSet;
|
||||
|
||||
public class TestCachingWrapperFilter extends LuceneTestCase {
|
||||
Directory dir;
|
||||
|
@ -241,7 +241,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
|
|||
if (originalSet.isCacheable()) {
|
||||
assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass());
|
||||
} else {
|
||||
assertTrue("Cached DocIdSet must be an WAH8DocIdSet if the original one was not cacheable", cachedSet instanceof WAH8DocIdSet || cachedSet == null);
|
||||
assertTrue("Cached DocIdSet must be a RoaringDocIdSet if the original one was not cacheable", cachedSet instanceof RoaringDocIdSet || cachedSet == null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
|
||||
public class TestNotDocIdSet extends BaseDocIdSetTestCase<NotDocIdSet> {
|
||||
|
||||
@Override
|
||||
public NotDocIdSet copyOf(BitSet bs, int length) throws IOException {
|
||||
final FixedBitSet set = new FixedBitSet(length);
|
||||
for (int doc = bs.nextClearBit(0); doc < length; doc = bs.nextClearBit(doc + 1)) {
|
||||
set.set(doc);
|
||||
}
|
||||
return new NotDocIdSet(length, set);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void assertEquals(int numBits, BitSet ds1, NotDocIdSet ds2)
|
||||
throws IOException {
|
||||
super.assertEquals(numBits, ds1, ds2);
|
||||
final Bits bits2 = ds2.bits();
|
||||
assertTrue(ds2.isCacheable()); // since we wrapped a FixedBitSet
|
||||
assertNotNull(bits2); // since we wrapped a FixedBitSet
|
||||
assertEquals(numBits, bits2.length());
|
||||
for (int i = 0; i < numBits; ++i) {
|
||||
assertEquals(ds1.get(i), bits2.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public void testBits() throws IOException {
|
||||
assertNull(new NotDocIdSet(3, DocIdSet.EMPTY).bits());
|
||||
assertNotNull(new NotDocIdSet(3, new FixedBitSet(3)).bits());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
public class TestRoaringDocIdSet extends BaseDocIdSetTestCase<RoaringDocIdSet> {
|
||||
|
||||
@Override
|
||||
public RoaringDocIdSet copyOf(BitSet bs, int length) throws IOException {
|
||||
final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(length);
|
||||
for (int i = bs.nextSetBit(0); i != -1; i = bs.nextSetBit(i + 1)) {
|
||||
builder.add(i);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void assertEquals(int numBits, BitSet ds1, RoaringDocIdSet ds2)
|
||||
throws IOException {
|
||||
super.assertEquals(numBits, ds1, ds2);
|
||||
assertEquals(ds1.cardinality(), ds2.cardinality());
|
||||
}
|
||||
|
||||
}
|
|
@ -57,8 +57,8 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
/** Test length=0. */
|
||||
public void testNoBit() throws IOException {
|
||||
final BitSet bs = new BitSet(1);
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
assertEquals(0, bs, copy);
|
||||
final T copy = copyOf(bs, 1);
|
||||
assertEquals(1, bs, copy);
|
||||
}
|
||||
|
||||
/** Test length=1. */
|
||||
|
@ -67,7 +67,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
if (random().nextBoolean()) {
|
||||
bs.set(0);
|
||||
}
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
final T copy = copyOf(bs, 1);
|
||||
assertEquals(1, bs, copy);
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
if (random().nextBoolean()) {
|
||||
bs.set(1);
|
||||
}
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
final T copy = copyOf(bs, 2);
|
||||
assertEquals(2, bs, copy);
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
public void testAgainstBitSet() throws IOException {
|
||||
final int numBits = TestUtil.nextInt(random(), 100, 1 << 20);
|
||||
// test various random sets with various load factors
|
||||
for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat() / 2, 0.9f, 1f}) {
|
||||
for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat(), 0.9f, 1f}) {
|
||||
final BitSet set = randomSet(numBits, percentSet);
|
||||
final T copy = copyOf(set, numBits);
|
||||
assertEquals(numBits, set, copy);
|
||||
|
|
Loading…
Reference in New Issue