LUCENE-5440: Add LongBitSet to handle large number of bits; replace usage of OpenBitSet by FixedBitSet/LongBitSet

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1566662 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2014-02-10 15:44:23 +00:00
parent d95750d27d
commit b872f4b5c3
18 changed files with 936 additions and 205 deletions

View File

@ -154,6 +154,9 @@ New Features
are out of bounds, e.g. using a bounding box filter with distance
range faceting. (Mike McCandless)
* LUCENE-5440: Add LongBitSet for managing more than 2.1B bits (otherwise use
FixedBitSet). (Shai Erera)
Build
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;

View File

@ -27,8 +27,8 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
@ -38,9 +38,9 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
@ -54,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private final TreeMap<String,Long> fields;
private final IndexInput in;
private final FieldInfos fieldInfos;
private final int maxDoc;
final static BytesRef END = SimpleTextFieldsWriter.END;
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
@ -66,6 +67,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
this.maxDoc = state.segmentInfo.getDocCount();
fieldInfos = state.fieldInfos;
in = state.directory.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
boolean success = false;
@ -492,6 +494,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTerms extends Terms {
private final long termsStart;
private final FieldInfo fieldInfo;
private final int maxDoc;
private long sumTotalTermFreq;
private long sumDocFreq;
private int docCount;
@ -500,7 +503,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
private final BytesRef scratch = new BytesRef(10);
private final CharsRef scratchUTF16 = new CharsRef(10);
public SimpleTextTerms(String field, long termsStart) throws IOException {
public SimpleTextTerms(String field, long termsStart, int maxDoc) throws IOException {
this.maxDoc = maxDoc;
this.termsStart = termsStart;
fieldInfo = fieldInfos.fieldInfo(field);
loadTerms();
@ -519,7 +523,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
long lastDocsStart = -1;
int docFreq = 0;
long totalTermFreq = 0;
OpenBitSet visitedDocs = new OpenBitSet();
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
final IntsRef scratchIntsRef = new IntsRef();
while(true) {
SimpleTextUtil.readLine(in, scratch);
@ -639,7 +643,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
if (fp == null) {
return null;
} else {
terms = new SimpleTextTerms(field, fp);
terms = new SimpleTextTerms(field, fp, maxDoc);
termsCache.put(field, (SimpleTextTerms) terms);
}
}

View File

@ -37,7 +37,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.LongBitSet;
/**
* Abstract API that consumes numeric, binary and
@ -285,7 +285,7 @@ public abstract class DocValuesConsumer implements Closeable {
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
} else {
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
LongBitSet bitset = new LongBitSet(dv.getValueCount());
for (int i = 0; i < reader.maxDoc(); i++) {
if (liveDocs.get(i)) {
int ord = dv.getOrd(i);
@ -420,7 +420,7 @@ public abstract class DocValuesConsumer implements Closeable {
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
} else {
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
LongBitSet bitset = new LongBitSet(dv.getValueCount());
for (int i = 0; i < reader.maxDoc(); i++) {
if (liveDocs.get(i)) {
dv.setDocument(i);
@ -625,9 +625,9 @@ public abstract class DocValuesConsumer implements Closeable {
// TODO: seek-by-ord to nextSetBit
static class BitsFilteredTermsEnum extends FilteredTermsEnum {
final OpenBitSet liveTerms;
final LongBitSet liveTerms;
BitsFilteredTermsEnum(TermsEnum in, OpenBitSet liveTerms) {
BitsFilteredTermsEnum(TermsEnum in, LongBitSet liveTerms) {
super(in, false); // <-- not passing false here wasted about 3 hours of my time!!!!!!!!!!!!!
assert liveTerms != null;
this.liveTerms = liveTerms;

View File

@ -27,7 +27,7 @@ import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
@ -48,7 +48,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
private final Counter iwBytesUsed;
private final AppendingDeltaPackedLongBuffer lengths;
private final OpenBitSet docsWithField;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
private int addedValues;
private long bytesUsed;
@ -59,7 +59,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
this.bytesOut = bytes.getDataOutput();
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new OpenBitSet();
this.docsWithField = new FixedBitSet(64);
this.bytesUsed = docsWithFieldBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@ -88,6 +88,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
// Should never happen!
throw new RuntimeException(ioe);
}
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
updateBytesUsed();
}

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CommandLineUtil;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.StringHelper;
/**
@ -1392,7 +1392,7 @@ public class CheckIndex {
private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
final long maxOrd = dv.getValueCount()-1;
OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
long maxOrd2 = -1;
for (int i = 0; i < reader.maxDoc(); i++) {
dv.setDocument(i);

View File

@ -23,7 +23,7 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
@ -37,18 +37,16 @@ class NumericDocValuesWriter extends DocValuesWriter {
private AppendingDeltaPackedLongBuffer pending;
private final Counter iwBytesUsed;
private long bytesUsed;
private final OpenBitSet docsWithField;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
private final boolean trackDocsWithField;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
docsWithField = new OpenBitSet();
docsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
iwBytesUsed.addAndGet(bytesUsed);
this.trackDocsWithField = trackDocsWithField;
}
public void addValue(int docID, long value) {
@ -62,7 +60,8 @@ class NumericDocValuesWriter extends DocValuesWriter {
}
pending.add(value);
if (trackDocsWithField) {
if (docsWithField != null) {
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
}
@ -71,7 +70,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
private long docsWithFieldBytesUsed() {
// size of the long[] + some overhead
return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
return docsWithField == null ? 0 : RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
}
private void updateBytesUsed() {
@ -126,13 +125,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
Long value;
if (upto < size) {
long v = iter.next();
if (!trackDocsWithField || docsWithField.get(upto)) {
if (docsWithField == null || docsWithField.get(upto)) {
value = v;
} else {
value = null;
}
} else {
value = trackDocsWithField ? null : MISSING;
value = docsWithField != null ? null : MISSING;
}
upto++;
return value;

View File

@ -2,10 +2,8 @@ package org.apache.lucene.index;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedGrowableWriter;
import org.apache.lucene.util.packed.PagedMutable;
@ -98,11 +96,7 @@ interface NumericFieldUpdates {
if (docs.size() == size) {
docs = docs.grow(size + 1);
values = values.grow(size + 1);
int numWords = (int) (docs.size() >> 6);
if (docsWithField.getBits().length <= numWords) {
numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG);
docsWithField = new FixedBitSet(docsWithField, numWords << 6);
}
docsWithField = FixedBitSet.ensureCapacity(docsWithField, (int) docs.size());
}
if (value != NumericUpdate.MISSING) {
@ -208,11 +202,7 @@ interface NumericFieldUpdates {
}
docs = docs.grow(size + packedOther.size);
values = values.grow(size + packedOther.size);
int numWords = (int) (docs.size() >> 6);
if (docsWithField.getBits().length <= numWords) {
numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG);
docsWithField = new FixedBitSet(docsWithField, numWords << 6);
}
docsWithField = FixedBitSet.ensureCapacity(docsWithField, (int) docs.size());
for (int i = 0; i < packedOther.size; i++) {
int doc = (int) packedOther.docs.get(i);
if (packedOther.docsWithField.get(i)) {

View File

@ -25,7 +25,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.LongBitSet;
/**
* Rewrites MultiTermQueries into a filter, using DocTermOrds for term enumeration.
@ -85,7 +85,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
// Cannot use FixedBitSet because we require long index (ord):
final OpenBitSet termSet = new OpenBitSet(docTermOrds.getValueCount());
final LongBitSet termSet = new LongBitSet(docTermOrds.getValueCount());
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@Override
@ -136,7 +136,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
assert termsEnum != null;
if (termsEnum.next() != null) {
// fill into a OpenBitSet
// fill into a bitset
do {
termSet.set(termsEnum.ord());
} while (termsEnum.next() != null);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.LongBitSet;
/**
* Rewrites MultiTermQueries into a filter, using the FieldCache for term enumeration.
@ -85,7 +85,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
// Cannot use FixedBitSet because we require long index (ord):
final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount());
final LongBitSet termSet = new LongBitSet(fcsi.getValueCount());
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@Override
@ -136,7 +136,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
assert termsEnum != null;
if (termsEnum.next() != null) {
// fill into a OpenBitSet
// fill into a bitset
do {
long ord = termsEnum.ord();
if (ord >= 0) {

View File

@ -27,7 +27,7 @@ import java.util.LinkedHashMap;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.FixedBitSet;
final class SloppyPhraseScorer extends Scorer {
private PhrasePositions min, max;
@ -152,7 +152,7 @@ final class SloppyPhraseScorer extends Scorer {
return true; // not a repeater
}
PhrasePositions[] rg = rptGroups[pp.rptGroup];
OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved
int k0 = pp.rptInd;
int k;
while((k=collide(pp)) >= 0) {
@ -167,6 +167,9 @@ final class SloppyPhraseScorer extends Scorer {
// collisions resolved, now re-queue
// empty (partially) the queue until seeing all pps advanced for resolving collisions
int n = 0;
// TODO can't this be checked once and decremented as we clear bits?
// in fact, we don't even need to clear any bits, since the bitset is totally discarded
// only need to pop as many set bits from the pq.
while (bits.cardinality() > 0) {
PhrasePositions pp2 = pq.pop();
rptStack[n++] = pp2;
@ -405,7 +408,7 @@ final class SloppyPhraseScorer extends Scorer {
} else {
// more involved - has multi-terms
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<HashSet<PhrasePositions>>();
ArrayList<OpenBitSet> bb = ppTermsBitSets(rpp, rptTerms);
ArrayList<FixedBitSet> bb = ppTermsBitSets(rpp, rptTerms);
unionTermGroups(bb);
HashMap<Term,Integer> tg = termGroups(rptTerms, bb);
HashSet<Integer> distinctGroupIDs = new HashSet<Integer>(tg.values());
@ -467,10 +470,10 @@ final class SloppyPhraseScorer extends Scorer {
}
/** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
private ArrayList<OpenBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
ArrayList<OpenBitSet> bb = new ArrayList<OpenBitSet>(rpp.length);
private ArrayList<FixedBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
ArrayList<FixedBitSet> bb = new ArrayList<FixedBitSet>(rpp.length);
for (PhrasePositions pp : rpp) {
OpenBitSet b = new OpenBitSet(tord.size());
FixedBitSet b = new FixedBitSet(tord.size());
Integer ord;
for (Term t: pp.terms) {
if ((ord=tord.get(t))!=null) {
@ -483,14 +486,14 @@ final class SloppyPhraseScorer extends Scorer {
}
/** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */
private void unionTermGroups(ArrayList<OpenBitSet> bb) {
private void unionTermGroups(ArrayList<FixedBitSet> bb) {
int incr;
for (int i=0; i<bb.size()-1; i+=incr) {
incr = 1;
int j = i+1;
while (j<bb.size()) {
if (bb.get(i).intersects(bb.get(j))) {
bb.get(i).union(bb.get(j));
bb.get(i).or(bb.get(j));
bb.remove(j);
incr = 0;
} else {
@ -501,7 +504,7 @@ final class SloppyPhraseScorer extends Scorer {
}
/** map each term to the single group that contains it */
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<OpenBitSet> bb) throws IOException {
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
HashMap<Term,Integer> tg = new HashMap<Term,Integer>();
Term[] t = tord.keySet().toArray(new Term[0]);
for (int i=0; i<bb.size(); i++) { // i is the group no.

View File

@ -23,23 +23,114 @@ import java.util.Arrays;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
// TODO: maybe merge with BitVector? Problem is BitVector
// caches its cardinality...
/** BitSet of fixed length (numBits), backed by accessible
* ({@link #getBits}) long[], accessed with an int index,
* implementing Bits and DocIdSet. Unlike {@link
* OpenBitSet} this bit set does not auto-expand, cannot
* handle long index, and does not have fastXX/XX variants
* (just X).
*
* @lucene.internal
**/
/**
* BitSet of fixed length (numBits), backed by accessible ({@link #getBits})
* long[], accessed with an int index, implementing {@link Bits} and
* {@link DocIdSet}. If you need to manage more than 2.1B bits, use
* {@link LongBitSet}.
*/
public final class FixedBitSet extends DocIdSet implements Bits {
/**
* A {@link DocIdSetIterator} which iterates over set bits in a
* {@link FixedBitSet}.
*/
public static final class FixedBitSetIterator extends DocIdSetIterator {
final int numBits, numWords;
final long[] bits;
int doc = -1;
public FixedBitSetIterator(long[] bits, int numBits, int wordLength) {
this.bits = bits;
this.numBits = numBits;
this.numWords = wordLength;
}
@Override
public int nextDoc() throws IOException {
if (doc == NO_MORE_DOCS || ++doc >= numBits) {
return doc = NO_MORE_DOCS;
}
int i = doc >> 6;
final int subIndex = doc & 0x3f; // index within the word
long word = bits[i] >> subIndex; // skip all the bits to the right of index
if (word != 0) {
return doc = doc + Long.numberOfTrailingZeros(word);
}
while (++i < numWords) {
word = bits[i];
if (word != 0) {
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
}
}
return doc = NO_MORE_DOCS;
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return numBits;
}
@Override
public int advance(int target) throws IOException {
if (doc == NO_MORE_DOCS || target >= numBits) {
return doc = NO_MORE_DOCS;
}
int i = target >> 6;
final int subIndex = target & 0x3f; // index within the word
long word = bits[i] >> subIndex; // skip all the bits to the right of index
if (word != 0) {
return doc = target + Long.numberOfTrailingZeros(word);
}
while (++i < numWords) {
word = bits[i];
if (word != 0) {
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
}
}
return doc = NO_MORE_DOCS;
}
}
private final long[] bits;
private final int numBits;
private final int wordLength;
/**
* If the given {@link FixedBitSet} is large enough to hold {@code numBits},
* returns the given bits, otherwise returns a new {@link FixedBitSet} which
* can hold the requested number of bits.
*
* <p>
* <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of
* the given {@code bits} if possible. Also, calling {@link #length()} on the
* returned bits may return a value greater than {@code numBits}.
*/
public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
if (numBits < bits.length()) {
return bits;
} else {
int numWords = bits2words(numBits);
long[] arr = bits.getBits();
if (numWords >= arr.length) {
arr = ArrayUtil.grow(arr, numWords + 1);
}
return new FixedBitSet(arr, arr.length << 6);
}
}
/** returns the number of 64 bit words it would take to hold numBits */
public static int bits2words(int numBits) {
int numLong = numBits >>> 6;
@ -64,82 +155,9 @@ public final class FixedBitSet extends DocIdSet implements Bits {
this.bits = storedBits;
}
/**
* Makes a full copy of the bits, while allowing to expand/shrink the bitset.
* If {@code numBits &lt; other.numBits}, then only the first {@code numBits}
* are copied from other.
*/
public FixedBitSet(FixedBitSet other, int numBits) {
wordLength = bits2words(numBits);
bits = new long[wordLength];
System.arraycopy(other.bits, 0, bits, 0, Math.min(other.wordLength, wordLength));
this.numBits = numBits;
}
@Override
public DocIdSetIterator iterator() {
// define locally so we don't have "enclosing acces" issue
final long[] bits = this.bits;
final int wordLength = this.wordLength;
final int numBits = this.numBits;
return new DocIdSetIterator() {
int doc = -1;
@Override
public int nextDoc() throws IOException {
if (doc == NO_MORE_DOCS || ++doc >= numBits) {
return doc = NO_MORE_DOCS;
}
int i = doc >> 6;
final int subIndex = doc & 0x3f; // index within the word
long word = bits[i] >> subIndex; // skip all the bits to the right of index
if (word != 0) {
return doc = doc + Long.numberOfTrailingZeros(word);
}
while (++i < wordLength) {
word = bits[i];
if (word != 0) {
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
}
}
return doc = NO_MORE_DOCS;
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return bits.length;
}
@Override
public int advance(int target) throws IOException {
if (doc == NO_MORE_DOCS || target >= numBits) {
return doc = NO_MORE_DOCS;
}
int i = target >> 6;
final int subIndex = target & 0x3f; // index within the word
long word = bits[i] >> subIndex; // skip all the bits to the right of index
if (word != 0) {
return doc = target + Long.numberOfTrailingZeros(word);
}
while (++i < wordLength) {
word = bits[i];
if (word != 0) {
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
}
}
return doc = NO_MORE_DOCS;
}
};
return new FixedBitSetIterator(bits, numBits, wordLength);
}
@Override
@ -272,6 +290,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
or(fbs.bits, fbs.numWords);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
fbs.advance(numBits);
} else {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
@ -292,6 +316,24 @@ public final class FixedBitSet extends DocIdSet implements Bits {
thisArr[pos] |= otherArr[pos];
}
}
/** this = this XOR other */
public void xor(FixedBitSet other) {
final long[] thisBits = this.bits;
final long[] otherBits = other.bits;
int pos = Math.min(wordLength, other.wordLength);
while (--pos >= 0) {
thisBits[pos] ^= otherBits[pos];
}
}
/** Does in-place XOR of the bits provided by the iterator. */
public void xor(DocIdSetIterator iter) throws IOException {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
flip(doc, doc + 1);
}
}
/** Does in-place AND of the bits provided by the
* iterator. */
@ -302,6 +344,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
and(fbs.bits, fbs.numWords);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
fbs.advance(numBits);
} else {
if (numBits == 0) return;
int disiDoc, bitSetDoc = nextSetBit(0);
@ -316,6 +364,15 @@ public final class FixedBitSet extends DocIdSet implements Bits {
}
}
/** returns true if the sets have any elements in common */
public boolean intersects(FixedBitSet other) {
int pos = Math.min(wordLength, other.wordLength);
while (--pos>=0) {
if ((bits[pos] & other.bits[pos]) != 0) return true;
}
return false;
}
/** this = this AND other */
public void and(FixedBitSet other) {
and(other.bits, other.wordLength);
@ -341,6 +398,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
andNot(fbs.bits, fbs.numWords);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
fbs.advance(numBits);
} else {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
@ -467,7 +530,9 @@ public final class FixedBitSet extends DocIdSet implements Bits {
@Override
public FixedBitSet clone() {
return new FixedBitSet(this, numBits);
long[] bits = new long[this.bits.length];
System.arraycopy(this.bits, 0, bits, 0, bits.length);
return new FixedBitSet(bits, numBits);
}
/** returns true if both sets have the same bits set */

View File

@ -0,0 +1,370 @@
package org.apache.lucene.util;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* BitSet of fixed length (numBits), backed by accessible ({@link #getBits})
* long[], accessed with a long index. Use it only if you intend to store more
* than 2.1B bits, otherwise you should use {@link FixedBitSet}.
*/
public final class LongBitSet {
private final long[] bits;
private final long numBits;
private final int numWords;
/**
* If the given {@link LongBitSet} is large enough to hold
* {@code numBits}, returns the given bits, otherwise returns a new
* {@link LongBitSet} which can hold the requested number of bits.
*
* <p>
* <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of
* the given {@code bits} if possible. Also, calling {@link #length()} on the
* returned bits may return a value greater than {@code numBits}.
*/
public static LongBitSet ensureCapacity(LongBitSet bits, long numBits) {
if (numBits < bits.length()) {
return bits;
} else {
int numWords = bits2words(numBits);
long[] arr = bits.getBits();
if (numWords >= arr.length) {
arr = ArrayUtil.grow(arr, numWords + 1);
}
return new LongBitSet(arr, arr.length << 6);
}
}
/** returns the number of 64 bit words it would take to hold numBits */
public static int bits2words(long numBits) {
int numLong = (int) (numBits >>> 6);
if ((numBits & 63) != 0) {
numLong++;
}
return numLong;
}
public LongBitSet(long numBits) {
this.numBits = numBits;
bits = new long[bits2words(numBits)];
numWords = bits.length;
}
public LongBitSet(long[] storedBits, long numBits) {
this.numWords = bits2words(numBits);
if (numWords > storedBits.length) {
throw new IllegalArgumentException("The given long array is too small to hold " + numBits + " bits");
}
this.numBits = numBits;
this.bits = storedBits;
}
/** Returns the number of bits stored in this bitset. */
public long length() {
return numBits;
}
/** Expert. */
public long[] getBits() {
return bits;
}
/** Returns number of set bits. NOTE: this visits every
* long in the backing bits array, and the result is not
* internally cached! */
public long cardinality() {
return BitUtil.pop_array(bits, 0, bits.length);
}
public boolean get(long index) {
assert index >= 0 && index < numBits: "index=" + index;
int i = (int) (index >> 6); // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
int bit = (int) (index & 0x3f); // mod 64
long bitmask = 1L << bit;
return (bits[i] & bitmask) != 0;
}
public void set(long index) {
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
int wordNum = (int) (index >> 6); // div 64
int bit = (int) (index & 0x3f); // mod 64
long bitmask = 1L << bit;
bits[wordNum] |= bitmask;
}
public boolean getAndSet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int) (index >> 6); // div 64
int bit = (int) (index & 0x3f); // mod 64
long bitmask = 1L << bit;
boolean val = (bits[wordNum] & bitmask) != 0;
bits[wordNum] |= bitmask;
return val;
}
public void clear(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int) (index >> 6);
int bit = (int) (index & 0x03f);
long bitmask = 1L << bit;
bits[wordNum] &= ~bitmask;
}
public boolean getAndClear(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int) (index >> 6); // div 64
int bit = (int) (index & 0x3f); // mod 64
long bitmask = 1L << bit;
boolean val = (bits[wordNum] & bitmask) != 0;
bits[wordNum] &= ~bitmask;
return val;
}
/** Returns the index of the first set bit starting at the index specified.
* -1 is returned if there are no more set bits.
*/
public long nextSetBit(long index) {
assert index >= 0 && index < numBits;
int i = (int) (index >> 6);
final int subIndex = (int) (index & 0x3f); // index within the word
long word = bits[i] >> subIndex; // skip all the bits to the right of index
if (word!=0) {
return index + Long.numberOfTrailingZeros(word);
}
while(++i < numWords) {
word = bits[i];
if (word != 0) {
return (i<<6) + Long.numberOfTrailingZeros(word);
}
}
return -1;
}
/** Returns the index of the last set bit before or on the index specified.
* -1 is returned if there are no more set bits.
*/
public long prevSetBit(long index) {
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
int i = (int) (index >> 6);
final int subIndex = (int) (index & 0x3f); // index within the word
long word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index
if (word != 0) {
return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197
}
while (--i >= 0) {
word = bits[i];
if (word !=0 ) {
return (i << 6) + 63 - Long.numberOfLeadingZeros(word);
}
}
return -1;
}
/** this = this OR other */
public void or(LongBitSet other) {
int pos = Math.min(numWords, other.numWords);
while (--pos >= 0) {
bits[pos] |= other.bits[pos];
}
}
/** this = this XOR other */
public void xor(LongBitSet other) {
int pos = Math.min(numWords, other.numWords);
while (--pos >= 0) {
bits[pos] ^= other.bits[pos];
}
}
/** returns true if the sets have any elements in common */
public boolean intersects(LongBitSet other) {
int pos = Math.min(numWords, other.numWords);
while (--pos>=0) {
if ((bits[pos] & other.bits[pos]) != 0) return true;
}
return false;
}
/** this = this AND other */
public void and(LongBitSet other) {
int pos = Math.min(numWords, other.numWords);
while (--pos >= 0) {
bits[pos] &= other.bits[pos];
}
if (numWords > other.numWords) {
Arrays.fill(bits, other.numWords, numWords, 0L);
}
}
/** this = this AND NOT other */
public void andNot(LongBitSet other) {
int pos = Math.min(numWords, other.bits.length);
while (--pos >= 0) {
bits[pos] &= ~other.bits[pos];
}
}
// NOTE: no .isEmpty() here because that's trappy (ie,
// typically isEmpty is low cost, but this one wouldn't
// be)
/** Flips a range of bits
*
* @param startIndex lower index
* @param endIndex one-past the last bit to flip
*/
public void flip(long startIndex, long endIndex) {
assert startIndex >= 0 && startIndex < numBits;
assert endIndex >= 0 && endIndex <= numBits;
if (endIndex <= startIndex) {
return;
}
int startWord = (int) (startIndex >> 6);
int endWord = (int) ((endIndex-1) >> 6);
/*** Grrr, java shifting wraps around so -1L>>>64 == -1
* for that reason, make sure not to use endmask if the bits to flip will
* be zero in the last word (redefine endWord to be the last changed...)
long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
***/
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
if (startWord == endWord) {
bits[startWord] ^= (startmask & endmask);
return;
}
bits[startWord] ^= startmask;
for (int i=startWord+1; i<endWord; i++) {
bits[i] = ~bits[i];
}
bits[endWord] ^= endmask;
}
/** Sets a range of bits
*
* @param startIndex lower index
* @param endIndex one-past the last bit to set
*/
public void set(long startIndex, long endIndex) {
assert startIndex >= 0 && startIndex < numBits;
assert endIndex >= 0 && endIndex <= numBits;
if (endIndex <= startIndex) {
return;
}
int startWord = (int) (startIndex >> 6);
int endWord = (int) ((endIndex-1) >> 6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
if (startWord == endWord) {
bits[startWord] |= (startmask & endmask);
return;
}
bits[startWord] |= startmask;
Arrays.fill(bits, startWord+1, endWord, -1L);
bits[endWord] |= endmask;
}
/** Clears a range of bits.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(long startIndex, long endIndex) {
assert startIndex >= 0 && startIndex < numBits;
assert endIndex >= 0 && endIndex <= numBits;
if (endIndex <= startIndex) {
return;
}
int startWord = (int) (startIndex >> 6);
int endWord = (int) ((endIndex-1) >> 6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord] &= (startmask | endmask);
return;
}
bits[startWord] &= startmask;
Arrays.fill(bits, startWord+1, endWord, 0L);
bits[endWord] &= endmask;
}
@Override
public LongBitSet clone() {
long[] bits = new long[this.bits.length];
System.arraycopy(this.bits, 0, bits, 0, bits.length);
return new LongBitSet(bits, numBits);
}
/** returns true if both sets have the same bits set */
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof LongBitSet)) {
return false;
}
LongBitSet other = (LongBitSet) o;
if (numBits != other.length()) {
return false;
}
return Arrays.equals(bits, other.bits);
}
@Override
public int hashCode() {
long h = 0;
for (int i = numWords; --i>=0;) {
h ^= bits[i];
h = (h << 1) | (h >>> 63); // rotate left
}
// fold leftmost bits into right and add a constant to prevent
// empty sets from returning 0, which is too common.
return (int) ((h>>32) ^ h) + 0x98761234;
}
}

View File

@ -175,10 +175,12 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
FixedBitSet b_and = b.clone(); assertEquals(b,b_and); b_and.and(b0);
FixedBitSet b_or = b.clone(); b_or.or(b0);
FixedBitSet b_xor = b.clone(); b_xor.xor(b0);
FixedBitSet b_andn = b.clone(); b_andn.andNot(b0);
assertEquals(a0.cardinality(), b0.cardinality());
@ -187,9 +189,11 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
doIterate(a_and,b_and, mode);
doIterate(a_or,b_or, mode);
doIterate(a_andn,b_andn, mode);
doIterate(a_xor,b_xor, mode);
assertEquals(a_and.cardinality(), b_and.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality());
assertEquals(a_xor.cardinality(), b_xor.cardinality());
assertEquals(a_andn.cardinality(), b_andn.cardinality());
}
@ -329,49 +333,31 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
checkNextSetBitArray(new int[0], setBits.length + random().nextInt(10));
}
public void testGrow() {
public void testEnsureCapacity() {
FixedBitSet bits = new FixedBitSet(5);
bits.set(1);
bits.set(4);
FixedBitSet newBits = new FixedBitSet(bits, 8); // grow within the word
FixedBitSet newBits = FixedBitSet.ensureCapacity(bits, 8); // grow within the word
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
newBits.clear(1);
// we align to 64-bits, so even though it shouldn't have, it re-allocated a long[1]
assertTrue(bits.get(1));
assertFalse(newBits.get(1));
newBits = new FixedBitSet(bits, 72); // grow beyond one word
newBits.set(1);
newBits = FixedBitSet.ensureCapacity(newBits, newBits.length() - 2); // reuse
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
}
public void testShrink() {
FixedBitSet bits = new FixedBitSet(72);
bits.set(1);
bits.set(4);
bits.set(69);
FixedBitSet newBits = new FixedBitSet(bits, 66); // shrink within the word
newBits = FixedBitSet.ensureCapacity(bits, 72); // grow beyond one word
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
boolean hitError = true;
try {
newBits.get(69);
hitError = false;
} catch (AssertionError e) {
hitError = true;
}
assertTrue(hitError);
newBits = new FixedBitSet(bits, 8); // shrink beyond one word
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
hitError = true;
try {
newBits.get(69);
hitError = false;
} catch (AssertionError e) {
hitError = true;
}
assertTrue(hitError);
newBits.clear(1);
// we grew the long[], so it's not shared
assertTrue(bits.get(1));
assertFalse(newBits.get(1));
}
}

View File

@ -0,0 +1,320 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.io.IOException;
import java.util.BitSet;
public class TestLongBitSet extends LuceneTestCase {
void doGet(BitSet a, LongBitSet b) {
long max = b.length();
for (int i=0; i<max; i++) {
if (a.get(i) != b.get(i)) {
fail("mismatch: BitSet=["+i+"]="+a.get(i));
}
}
}
void doNextSetBit(BitSet a, LongBitSet b) {
int aa=-1;
long bb=-1;
do {
aa = a.nextSetBit(aa+1);
bb = bb < b.length()-1 ? b.nextSetBit(bb+1) : -1;
assertEquals(aa,bb);
} while (aa>=0);
}
void doPrevSetBit(BitSet a, LongBitSet b) {
int aa = a.size() + random().nextInt(100);
long bb = aa;
do {
// aa = a.prevSetBit(aa-1);
aa--;
while ((aa >= 0) && (! a.get(aa))) {
aa--;
}
if (b.length() == 0) {
bb = -1;
} else if (bb > b.length()-1) {
bb = b.prevSetBit(b.length()-1);
} else if (bb < 1) {
bb = -1;
} else {
bb = bb >= 1 ? b.prevSetBit(bb-1) : -1;
}
assertEquals(aa,bb);
} while (aa>=0);
}
void doRandomSets(int maxSize, int iter, int mode) throws IOException {
BitSet a0=null;
LongBitSet b0=null;
for (int i=0; i<iter; i++) {
int sz = _TestUtil.nextInt(random(), 2, maxSize);
BitSet a = new BitSet(sz);
LongBitSet b = new LongBitSet(sz);
// test the various ways of setting bits
if (sz>0) {
int nOper = random().nextInt(sz);
for (int j=0; j<nOper; j++) {
int idx;
idx = random().nextInt(sz);
a.set(idx);
b.set(idx);
idx = random().nextInt(sz);
a.clear(idx);
b.clear(idx);
idx = random().nextInt(sz);
a.flip(idx);
b.flip(idx, idx+1);
idx = random().nextInt(sz);
a.flip(idx);
b.flip(idx, idx+1);
boolean val2 = b.get(idx);
boolean val = b.getAndSet(idx);
assertTrue(val2 == val);
assertTrue(b.get(idx));
if (!val) b.clear(idx);
assertTrue(b.get(idx) == val);
}
}
// test that the various ways of accessing the bits are equivalent
doGet(a,b);
// test ranges, including possible extension
int fromIndex, toIndex;
fromIndex = random().nextInt(sz/2);
toIndex = fromIndex + random().nextInt(sz - fromIndex);
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
LongBitSet bb = b.clone(); bb.flip(fromIndex,toIndex);
fromIndex = random().nextInt(sz/2);
toIndex = fromIndex + random().nextInt(sz - fromIndex);
aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex);
bb = b.clone(); bb.clear(fromIndex,toIndex);
doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit
doPrevSetBit(aa,bb);
fromIndex = random().nextInt(sz/2);
toIndex = fromIndex + random().nextInt(sz - fromIndex);
aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex);
bb = b.clone(); bb.set(fromIndex,toIndex);
doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit
doPrevSetBit(aa,bb);
if (b0 != null && b0.length() <= b.length()) {
assertEquals(a.cardinality(), b.cardinality());
BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
LongBitSet b_and = b.clone(); assertEquals(b,b_and); b_and.and(b0);
LongBitSet b_or = b.clone(); b_or.or(b0);
LongBitSet b_xor = b.clone(); b_xor.xor(b0);
LongBitSet b_andn = b.clone(); b_andn.andNot(b0);
assertEquals(a0.cardinality(), b0.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality());
assertEquals(a_and.cardinality(), b_and.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality());
assertEquals(a_xor.cardinality(), b_xor.cardinality());
assertEquals(a_andn.cardinality(), b_andn.cardinality());
}
a0=a;
b0=b;
}
}
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
// larger testsuite.
public void testSmall() throws IOException {
doRandomSets(atLeast(1200), atLeast(1000), 1);
doRandomSets(atLeast(1200), atLeast(1000), 2);
}
// uncomment to run a bigger test (~2 minutes).
/*
public void testBig() {
doRandomSets(2000,200000, 1);
doRandomSets(2000,200000, 2);
}
*/
public void testEquals() {
// This test can't handle numBits==0:
final int numBits = random().nextInt(2000) + 1;
LongBitSet b1 = new LongBitSet(numBits);
LongBitSet b2 = new LongBitSet(numBits);
assertTrue(b1.equals(b2));
assertTrue(b2.equals(b1));
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
int idx = random().nextInt(numBits);
if (!b1.get(idx)) {
b1.set(idx);
assertFalse(b1.equals(b2));
assertFalse(b2.equals(b1));
b2.set(idx);
assertTrue(b1.equals(b2));
assertTrue(b2.equals(b1));
}
}
// try different type of object
assertFalse(b1.equals(new Object()));
}
public void testHashCodeEquals() {
// This test can't handle numBits==0:
final int numBits = random().nextInt(2000) + 1;
LongBitSet b1 = new LongBitSet(numBits);
LongBitSet b2 = new LongBitSet(numBits);
assertTrue(b1.equals(b2));
assertTrue(b2.equals(b1));
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
int idx = random().nextInt(numBits);
if (!b1.get(idx)) {
b1.set(idx);
assertFalse(b1.equals(b2));
assertFalse(b1.hashCode() == b2.hashCode());
b2.set(idx);
assertEquals(b1, b2);
assertEquals(b1.hashCode(), b2.hashCode());
}
}
}
public void testSmallBitSets() {
// Make sure size 0-10 bit sets are OK:
for(int numBits=0;numBits<10;numBits++) {
LongBitSet b1 = new LongBitSet(numBits);
LongBitSet b2 = new LongBitSet(numBits);
assertTrue(b1.equals(b2));
assertEquals(b1.hashCode(), b2.hashCode());
assertEquals(0, b1.cardinality());
if (numBits > 0) {
b1.set(0, numBits);
assertEquals(numBits, b1.cardinality());
b1.flip(0, numBits);
assertEquals(0, b1.cardinality());
}
}
}
private LongBitSet makeLongFixedBitSet(int[] a, int numBits) {
LongBitSet bs;
if (random().nextBoolean()) {
int bits2words = LongBitSet.bits2words(numBits);
long[] words = new long[bits2words + random().nextInt(100)];
for (int i = bits2words; i < words.length; i++) {
words[i] = random().nextLong();
}
bs = new LongBitSet(words, numBits);
} else {
bs = new LongBitSet(numBits);
}
for (int e: a) {
bs.set(e);
}
return bs;
}
private BitSet makeBitSet(int[] a) {
BitSet bs = new BitSet();
for (int e: a) {
bs.set(e);
}
return bs;
}
private void checkPrevSetBitArray(int [] a, int numBits) {
LongBitSet obs = makeLongFixedBitSet(a, numBits);
BitSet bs = makeBitSet(a);
doPrevSetBit(bs, obs);
}
public void testPrevSetBit() {
checkPrevSetBitArray(new int[] {}, 0);
checkPrevSetBitArray(new int[] {0}, 1);
checkPrevSetBitArray(new int[] {0,2}, 3);
}
private void checkNextSetBitArray(int [] a, int numBits) {
LongBitSet obs = makeLongFixedBitSet(a, numBits);
BitSet bs = makeBitSet(a);
doNextSetBit(bs, obs);
}
public void testNextBitSet() {
int[] setBits = new int[0+random().nextInt(1000)];
for (int i = 0; i < setBits.length; i++) {
setBits[i] = random().nextInt(setBits.length);
}
checkNextSetBitArray(setBits, setBits.length + random().nextInt(10));
checkNextSetBitArray(new int[0], setBits.length + random().nextInt(10));
}
public void testEnsureCapacity() {
LongBitSet bits = new LongBitSet(5);
bits.set(1);
bits.set(4);
LongBitSet newBits = LongBitSet.ensureCapacity(bits, 8); // grow within the word
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
newBits.clear(1);
// we align to 64-bits, so even though it shouldn't have, it re-allocated a long[1]
assertTrue(bits.get(1));
assertFalse(newBits.get(1));
newBits.set(1);
newBits = LongBitSet.ensureCapacity(newBits, newBits.length() - 2); // reuse
assertTrue(newBits.get(1));
bits.set(1);
newBits = LongBitSet.ensureCapacity(bits, 72); // grow beyond one word
assertTrue(newBits.get(1));
assertTrue(newBits.get(4));
newBits.clear(1);
// we grew the long[], so it's not shared
assertTrue(bits.get(1));
assertFalse(newBits.get(1));
}
}

View File

@ -221,7 +221,7 @@ public class TestNumericUtils extends LuceneTestCase {
final boolean useBitSet, final Iterable<Long> expectedBounds, final Iterable<Integer> expectedShifts
) {
// Cannot use FixedBitSet since the range could be long:
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
final LongBitSet bits=useBitSet ? new LongBitSet(upper-lower+1) : null;
final Iterator<Long> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();

View File

@ -17,6 +17,8 @@ package org.apache.lucene.queries;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.BitsFilteredDocIdSet;
@ -24,10 +26,7 @@ import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import java.io.IOException;
import org.apache.lucene.util.FixedBitSet;
/**
* <p>
@ -127,23 +126,17 @@ public class ChainedFilter extends Filter {
}
}
private OpenBitSetDISI initialResult(AtomicReaderContext context, int logic, int[] index)
private FixedBitSet initialResult(AtomicReaderContext context, int logic, int[] index)
throws IOException {
AtomicReader reader = context.reader();
OpenBitSetDISI result;
/**
* First AND operation takes place against a completely false
* bitset and will always return zero results.
*/
FixedBitSet result = new FixedBitSet(reader.maxDoc());
if (logic == AND) {
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
result.or(getDISI(chain[index[0]], context));
++index[0];
} else if (logic == ANDNOT) {
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
result.or(getDISI(chain[index[0]], context));
result.flip(0, reader.maxDoc()); // NOTE: may set bits for deleted docs.
++index[0];
} else {
result = new OpenBitSetDISI(reader.maxDoc());
}
return result;
}
@ -157,7 +150,7 @@ public class ChainedFilter extends Filter {
*/
private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index)
throws IOException {
OpenBitSetDISI result = initialResult(context, logic, index);
FixedBitSet result = initialResult(context, logic, index);
for (; index[0] < chain.length; index[0]++) {
// we dont pass acceptDocs, we will filter at the end using an additional filter
doChain(result, logic, chain[index[0]].getDocIdSet(context, null));
@ -178,7 +171,7 @@ public class ChainedFilter extends Filter {
throw new IllegalArgumentException("Invalid number of elements in logic array");
}
OpenBitSetDISI result = initialResult(context, logic[0], index);
FixedBitSet result = initialResult(context, logic[0], index);
for (; index[0] < chain.length; index[0]++) {
// we dont pass acceptDocs, we will filter at the end using an additional filter
doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null));
@ -198,23 +191,21 @@ public class ChainedFilter extends Filter {
return sb.toString();
}
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
throws IOException {
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
private void doChain(FixedBitSet result, int logic, DocIdSet dis) throws IOException {
if (dis instanceof FixedBitSet) {
// optimized case for FixedBitSets
switch (logic) {
case OR:
result.or((OpenBitSet) dis);
result.or((FixedBitSet) dis);
break;
case AND:
result.and((OpenBitSet) dis);
result.and((FixedBitSet) dis);
break;
case ANDNOT:
result.andNot((OpenBitSet) dis);
result.andNot((FixedBitSet) dis);
break;
case XOR:
result.xor((OpenBitSet) dis);
result.xor((FixedBitSet) dis);
break;
default:
doChain(result, DEFAULT, dis);
@ -233,16 +224,16 @@ public class ChainedFilter extends Filter {
switch (logic) {
case OR:
result.inPlaceOr(disi);
result.or(disi);
break;
case AND:
result.inPlaceAnd(disi);
result.and(disi);
break;
case ANDNOT:
result.inPlaceNot(disi);
result.andNot(disi);
break;
case XOR:
result.inPlaceXor(disi);
result.xor(disi);
break;
default:
doChain(result, DEFAULT, dis);

View File

@ -83,7 +83,7 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre
* The {@link #getDocIdSet()} method here starts the work. It first checks
* that there are indexed terms; if not it quickly returns null. Then it calls
* {@link #start()} so a subclass can set up a return value, like an
* {@link org.apache.lucene.util.OpenBitSet}. Then it starts the traversal
* {@link org.apache.lucene.util.FixedBitSet}. Then it starts the traversal
* process, calling {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Cell)}
* which by default finds the top cells that intersect {@code queryShape}. If
* there isn't an indexed cell for a corresponding cell returned for this

View File

@ -36,8 +36,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.LongBitSet;
/**
* Just like {@link Lucene45DocValuesFormat} but with additional asserts.
@ -147,7 +146,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
int docCount = 0;
long ordCount = 0;
OpenBitSet seenOrds = new OpenBitSet(valueCount);
LongBitSet seenOrds = new LongBitSet(valueCount);
Iterator<Number> ordIterator = ords.iterator();
for (Number v : docToOrdCount) {
assert v != null;