mirror of https://github.com/apache/lucene.git
LUCENE-5440: Add LongBitSet to handle large number of bits; replace usage of OpenBitSet by FixedBitSet/LongBitSet
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1566662 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d95750d27d
commit
b872f4b5c3
|
@ -154,6 +154,9 @@ New Features
|
|||
are out of bounds, e.g. using a bounding box filter with distance
|
||||
range faceting. (Mike McCandless)
|
||||
|
||||
* LUCENE-5440: Add LongBitSet for managing more than 2.1B bits (otherwise use
|
||||
FixedBitSet). (Shai Erera)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
||||
|
|
|
@ -27,8 +27,8 @@ import java.util.TreeMap;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -38,9 +38,9 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
|
@ -54,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private final TreeMap<String,Long> fields;
|
||||
private final IndexInput in;
|
||||
private final FieldInfos fieldInfos;
|
||||
private final int maxDoc;
|
||||
|
||||
final static BytesRef END = SimpleTextFieldsWriter.END;
|
||||
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
|
||||
|
@ -66,6 +67,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
||||
|
||||
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
|
||||
this.maxDoc = state.segmentInfo.getDocCount();
|
||||
fieldInfos = state.fieldInfos;
|
||||
in = state.directory.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
|
||||
boolean success = false;
|
||||
|
@ -492,6 +494,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private class SimpleTextTerms extends Terms {
|
||||
private final long termsStart;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int maxDoc;
|
||||
private long sumTotalTermFreq;
|
||||
private long sumDocFreq;
|
||||
private int docCount;
|
||||
|
@ -500,7 +503,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final CharsRef scratchUTF16 = new CharsRef(10);
|
||||
|
||||
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
||||
public SimpleTextTerms(String field, long termsStart, int maxDoc) throws IOException {
|
||||
this.maxDoc = maxDoc;
|
||||
this.termsStart = termsStart;
|
||||
fieldInfo = fieldInfos.fieldInfo(field);
|
||||
loadTerms();
|
||||
|
@ -519,7 +523,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
long lastDocsStart = -1;
|
||||
int docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
OpenBitSet visitedDocs = new OpenBitSet();
|
||||
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
|
||||
final IntsRef scratchIntsRef = new IntsRef();
|
||||
while(true) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
|
@ -639,7 +643,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
if (fp == null) {
|
||||
return null;
|
||||
} else {
|
||||
terms = new SimpleTextTerms(field, fp);
|
||||
terms = new SimpleTextTerms(field, fp, maxDoc);
|
||||
termsCache.put(field, (SimpleTextTerms) terms);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* Abstract API that consumes numeric, binary and
|
||||
|
@ -285,7 +285,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (liveDocs == null) {
|
||||
liveTerms[sub] = dv.termsEnum();
|
||||
} else {
|
||||
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
||||
LongBitSet bitset = new LongBitSet(dv.getValueCount());
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
if (liveDocs.get(i)) {
|
||||
int ord = dv.getOrd(i);
|
||||
|
@ -420,7 +420,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (liveDocs == null) {
|
||||
liveTerms[sub] = dv.termsEnum();
|
||||
} else {
|
||||
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
||||
LongBitSet bitset = new LongBitSet(dv.getValueCount());
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
if (liveDocs.get(i)) {
|
||||
dv.setDocument(i);
|
||||
|
@ -625,9 +625,9 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
|
||||
// TODO: seek-by-ord to nextSetBit
|
||||
static class BitsFilteredTermsEnum extends FilteredTermsEnum {
|
||||
final OpenBitSet liveTerms;
|
||||
final LongBitSet liveTerms;
|
||||
|
||||
BitsFilteredTermsEnum(TermsEnum in, OpenBitSet liveTerms) {
|
||||
BitsFilteredTermsEnum(TermsEnum in, LongBitSet liveTerms) {
|
||||
super(in, false); // <-- not passing false here wasted about 3 hours of my time!!!!!!!!!!!!!
|
||||
assert liveTerms != null;
|
||||
this.liveTerms = liveTerms;
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
|
@ -48,7 +48,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
private final Counter iwBytesUsed;
|
||||
private final AppendingDeltaPackedLongBuffer lengths;
|
||||
private final OpenBitSet docsWithField;
|
||||
private FixedBitSet docsWithField;
|
||||
private final FieldInfo fieldInfo;
|
||||
private int addedValues;
|
||||
private long bytesUsed;
|
||||
|
@ -59,7 +59,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
this.bytesOut = bytes.getDataOutput();
|
||||
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
this.docsWithField = new OpenBitSet();
|
||||
this.docsWithField = new FixedBitSet(64);
|
||||
this.bytesUsed = docsWithFieldBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -88,6 +88,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
// Should never happen!
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
updateBytesUsed();
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CommandLineUtil;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
|
@ -1392,7 +1392,7 @@ public class CheckIndex {
|
|||
|
||||
private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
|
||||
final long maxOrd = dv.getValueCount()-1;
|
||||
OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
|
||||
LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
|
||||
long maxOrd2 = -1;
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
dv.setDocument(i);
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.NoSuchElementException;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -37,18 +37,16 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
private AppendingDeltaPackedLongBuffer pending;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed;
|
||||
private final OpenBitSet docsWithField;
|
||||
private FixedBitSet docsWithField;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final boolean trackDocsWithField;
|
||||
|
||||
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
|
||||
pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||
docsWithField = new OpenBitSet();
|
||||
docsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
|
||||
bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
this.trackDocsWithField = trackDocsWithField;
|
||||
}
|
||||
|
||||
public void addValue(int docID, long value) {
|
||||
|
@ -62,7 +60,8 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
pending.add(value);
|
||||
if (trackDocsWithField) {
|
||||
if (docsWithField != null) {
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
}
|
||||
|
||||
|
@ -71,7 +70,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
private long docsWithFieldBytesUsed() {
|
||||
// size of the long[] + some overhead
|
||||
return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
||||
return docsWithField == null ? 0 : RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
||||
}
|
||||
|
||||
private void updateBytesUsed() {
|
||||
|
@ -126,13 +125,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
Long value;
|
||||
if (upto < size) {
|
||||
long v = iter.next();
|
||||
if (!trackDocsWithField || docsWithField.get(upto)) {
|
||||
if (docsWithField == null || docsWithField.get(upto)) {
|
||||
value = v;
|
||||
} else {
|
||||
value = null;
|
||||
}
|
||||
} else {
|
||||
value = trackDocsWithField ? null : MISSING;
|
||||
value = docsWithField != null ? null : MISSING;
|
||||
}
|
||||
upto++;
|
||||
return value;
|
||||
|
|
|
@ -2,10 +2,8 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||
import org.apache.lucene.util.packed.PagedMutable;
|
||||
|
@ -98,11 +96,7 @@ interface NumericFieldUpdates {
|
|||
if (docs.size() == size) {
|
||||
docs = docs.grow(size + 1);
|
||||
values = values.grow(size + 1);
|
||||
int numWords = (int) (docs.size() >> 6);
|
||||
if (docsWithField.getBits().length <= numWords) {
|
||||
numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG);
|
||||
docsWithField = new FixedBitSet(docsWithField, numWords << 6);
|
||||
}
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, (int) docs.size());
|
||||
}
|
||||
|
||||
if (value != NumericUpdate.MISSING) {
|
||||
|
@ -208,11 +202,7 @@ interface NumericFieldUpdates {
|
|||
}
|
||||
docs = docs.grow(size + packedOther.size);
|
||||
values = values.grow(size + packedOther.size);
|
||||
int numWords = (int) (docs.size() >> 6);
|
||||
if (docsWithField.getBits().length <= numWords) {
|
||||
numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG);
|
||||
docsWithField = new FixedBitSet(docsWithField, numWords << 6);
|
||||
}
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, (int) docs.size());
|
||||
for (int i = 0; i < packedOther.size; i++) {
|
||||
int doc = (int) packedOther.docs.get(i);
|
||||
if (packedOther.docsWithField.get(i)) {
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* Rewrites MultiTermQueries into a filter, using DocTermOrds for term enumeration.
|
||||
|
@ -85,7 +85,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
|
||||
// Cannot use FixedBitSet because we require long index (ord):
|
||||
final OpenBitSet termSet = new OpenBitSet(docTermOrds.getValueCount());
|
||||
final LongBitSet termSet = new LongBitSet(docTermOrds.getValueCount());
|
||||
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
|
||||
|
||||
@Override
|
||||
|
@ -136,7 +136,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.next() != null) {
|
||||
// fill into a OpenBitSet
|
||||
// fill into a bitset
|
||||
do {
|
||||
termSet.set(termsEnum.ord());
|
||||
} while (termsEnum.next() != null);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* Rewrites MultiTermQueries into a filter, using the FieldCache for term enumeration.
|
||||
|
@ -85,7 +85,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
|
||||
// Cannot use FixedBitSet because we require long index (ord):
|
||||
final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount());
|
||||
final LongBitSet termSet = new LongBitSet(fcsi.getValueCount());
|
||||
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
|
||||
|
||||
@Override
|
||||
|
@ -136,7 +136,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.next() != null) {
|
||||
// fill into a OpenBitSet
|
||||
// fill into a bitset
|
||||
do {
|
||||
long ord = termsEnum.ord();
|
||||
if (ord >= 0) {
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.util.LinkedHashMap;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
final class SloppyPhraseScorer extends Scorer {
|
||||
private PhrasePositions min, max;
|
||||
|
@ -152,7 +152,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
return true; // not a repeater
|
||||
}
|
||||
PhrasePositions[] rg = rptGroups[pp.rptGroup];
|
||||
OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
|
||||
FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved
|
||||
int k0 = pp.rptInd;
|
||||
int k;
|
||||
while((k=collide(pp)) >= 0) {
|
||||
|
@ -167,6 +167,9 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
// collisions resolved, now re-queue
|
||||
// empty (partially) the queue until seeing all pps advanced for resolving collisions
|
||||
int n = 0;
|
||||
// TODO can't this be checked once and decremented as we clear bits?
|
||||
// in fact, we don't even need to clear any bits, since the bitset is totally discarded
|
||||
// only need to pop as many set bits from the pq.
|
||||
while (bits.cardinality() > 0) {
|
||||
PhrasePositions pp2 = pq.pop();
|
||||
rptStack[n++] = pp2;
|
||||
|
@ -405,7 +408,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
} else {
|
||||
// more involved - has multi-terms
|
||||
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<HashSet<PhrasePositions>>();
|
||||
ArrayList<OpenBitSet> bb = ppTermsBitSets(rpp, rptTerms);
|
||||
ArrayList<FixedBitSet> bb = ppTermsBitSets(rpp, rptTerms);
|
||||
unionTermGroups(bb);
|
||||
HashMap<Term,Integer> tg = termGroups(rptTerms, bb);
|
||||
HashSet<Integer> distinctGroupIDs = new HashSet<Integer>(tg.values());
|
||||
|
@ -467,10 +470,10 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
|
||||
private ArrayList<OpenBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
|
||||
ArrayList<OpenBitSet> bb = new ArrayList<OpenBitSet>(rpp.length);
|
||||
private ArrayList<FixedBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
|
||||
ArrayList<FixedBitSet> bb = new ArrayList<FixedBitSet>(rpp.length);
|
||||
for (PhrasePositions pp : rpp) {
|
||||
OpenBitSet b = new OpenBitSet(tord.size());
|
||||
FixedBitSet b = new FixedBitSet(tord.size());
|
||||
Integer ord;
|
||||
for (Term t: pp.terms) {
|
||||
if ((ord=tord.get(t))!=null) {
|
||||
|
@ -483,14 +486,14 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */
|
||||
private void unionTermGroups(ArrayList<OpenBitSet> bb) {
|
||||
private void unionTermGroups(ArrayList<FixedBitSet> bb) {
|
||||
int incr;
|
||||
for (int i=0; i<bb.size()-1; i+=incr) {
|
||||
incr = 1;
|
||||
int j = i+1;
|
||||
while (j<bb.size()) {
|
||||
if (bb.get(i).intersects(bb.get(j))) {
|
||||
bb.get(i).union(bb.get(j));
|
||||
bb.get(i).or(bb.get(j));
|
||||
bb.remove(j);
|
||||
incr = 0;
|
||||
} else {
|
||||
|
@ -501,7 +504,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** map each term to the single group that contains it */
|
||||
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<OpenBitSet> bb) throws IOException {
|
||||
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
|
||||
HashMap<Term,Integer> tg = new HashMap<Term,Integer>();
|
||||
Term[] t = tord.keySet().toArray(new Term[0]);
|
||||
for (int i=0; i<bb.size(); i++) { // i is the group no.
|
||||
|
|
|
@ -23,23 +23,114 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
// TODO: maybe merge with BitVector? Problem is BitVector
|
||||
// caches its cardinality...
|
||||
|
||||
/** BitSet of fixed length (numBits), backed by accessible
|
||||
* ({@link #getBits}) long[], accessed with an int index,
|
||||
* implementing Bits and DocIdSet. Unlike {@link
|
||||
* OpenBitSet} this bit set does not auto-expand, cannot
|
||||
* handle long index, and does not have fastXX/XX variants
|
||||
* (just X).
|
||||
*
|
||||
* @lucene.internal
|
||||
**/
|
||||
/**
|
||||
* BitSet of fixed length (numBits), backed by accessible ({@link #getBits})
|
||||
* long[], accessed with an int index, implementing {@link Bits} and
|
||||
* {@link DocIdSet}. If you need to manage more than 2.1B bits, use
|
||||
* {@link LongBitSet}.
|
||||
*/
|
||||
public final class FixedBitSet extends DocIdSet implements Bits {
|
||||
|
||||
/**
|
||||
* A {@link DocIdSetIterator} which iterates over set bits in a
|
||||
* {@link FixedBitSet}.
|
||||
*/
|
||||
public static final class FixedBitSetIterator extends DocIdSetIterator {
|
||||
|
||||
final int numBits, numWords;
|
||||
final long[] bits;
|
||||
int doc = -1;
|
||||
|
||||
public FixedBitSetIterator(long[] bits, int numBits, int wordLength) {
|
||||
this.bits = bits;
|
||||
this.numBits = numBits;
|
||||
this.numWords = wordLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (doc == NO_MORE_DOCS || ++doc >= numBits) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
int i = doc >> 6;
|
||||
final int subIndex = doc & 0x3f; // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word != 0) {
|
||||
return doc = doc + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
|
||||
while (++i < numWords) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return numBits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (doc == NO_MORE_DOCS || target >= numBits) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
int i = target >> 6;
|
||||
final int subIndex = target & 0x3f; // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word != 0) {
|
||||
return doc = target + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
|
||||
while (++i < numWords) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
private final long[] bits;
|
||||
private final int numBits;
|
||||
private final int wordLength;
|
||||
|
||||
/**
|
||||
* If the given {@link FixedBitSet} is large enough to hold {@code numBits},
|
||||
* returns the given bits, otherwise returns a new {@link FixedBitSet} which
|
||||
* can hold the requested number of bits.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of
|
||||
* the given {@code bits} if possible. Also, calling {@link #length()} on the
|
||||
* returned bits may return a value greater than {@code numBits}.
|
||||
*/
|
||||
public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
|
||||
if (numBits < bits.length()) {
|
||||
return bits;
|
||||
} else {
|
||||
int numWords = bits2words(numBits);
|
||||
long[] arr = bits.getBits();
|
||||
if (numWords >= arr.length) {
|
||||
arr = ArrayUtil.grow(arr, numWords + 1);
|
||||
}
|
||||
return new FixedBitSet(arr, arr.length << 6);
|
||||
}
|
||||
}
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold numBits */
|
||||
public static int bits2words(int numBits) {
|
||||
int numLong = numBits >>> 6;
|
||||
|
@ -64,82 +155,9 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
this.bits = storedBits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a full copy of the bits, while allowing to expand/shrink the bitset.
|
||||
* If {@code numBits < other.numBits}, then only the first {@code numBits}
|
||||
* are copied from other.
|
||||
*/
|
||||
public FixedBitSet(FixedBitSet other, int numBits) {
|
||||
wordLength = bits2words(numBits);
|
||||
bits = new long[wordLength];
|
||||
System.arraycopy(other.bits, 0, bits, 0, Math.min(other.wordLength, wordLength));
|
||||
this.numBits = numBits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
// define locally so we don't have "enclosing acces" issue
|
||||
final long[] bits = this.bits;
|
||||
final int wordLength = this.wordLength;
|
||||
final int numBits = this.numBits;
|
||||
return new DocIdSetIterator() {
|
||||
int doc = -1;
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (doc == NO_MORE_DOCS || ++doc >= numBits) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
int i = doc >> 6;
|
||||
final int subIndex = doc & 0x3f; // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word != 0) {
|
||||
return doc = doc + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
|
||||
while (++i < wordLength) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return bits.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (doc == NO_MORE_DOCS || target >= numBits) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
int i = target >> 6;
|
||||
final int subIndex = target & 0x3f; // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word != 0) {
|
||||
return doc = target + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
|
||||
while (++i < wordLength) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return doc = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
};
|
||||
return new FixedBitSetIterator(bits, numBits, wordLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -272,6 +290,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
obs.advance(numBits);
|
||||
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
|
||||
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
|
||||
or(fbs.bits, fbs.numWords);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
fbs.advance(numBits);
|
||||
} else {
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) < numBits) {
|
||||
|
@ -292,6 +316,24 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
thisArr[pos] |= otherArr[pos];
|
||||
}
|
||||
}
|
||||
|
||||
/** this = this XOR other */
|
||||
public void xor(FixedBitSet other) {
|
||||
final long[] thisBits = this.bits;
|
||||
final long[] otherBits = other.bits;
|
||||
int pos = Math.min(wordLength, other.wordLength);
|
||||
while (--pos >= 0) {
|
||||
thisBits[pos] ^= otherBits[pos];
|
||||
}
|
||||
}
|
||||
|
||||
/** Does in-place XOR of the bits provided by the iterator. */
|
||||
public void xor(DocIdSetIterator iter) throws IOException {
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) < numBits) {
|
||||
flip(doc, doc + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/** Does in-place AND of the bits provided by the
|
||||
* iterator. */
|
||||
|
@ -302,6 +344,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
obs.advance(numBits);
|
||||
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
|
||||
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
|
||||
and(fbs.bits, fbs.numWords);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
fbs.advance(numBits);
|
||||
} else {
|
||||
if (numBits == 0) return;
|
||||
int disiDoc, bitSetDoc = nextSetBit(0);
|
||||
|
@ -316,6 +364,15 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
}
|
||||
}
|
||||
|
||||
/** returns true if the sets have any elements in common */
|
||||
public boolean intersects(FixedBitSet other) {
|
||||
int pos = Math.min(wordLength, other.wordLength);
|
||||
while (--pos>=0) {
|
||||
if ((bits[pos] & other.bits[pos]) != 0) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** this = this AND other */
|
||||
public void and(FixedBitSet other) {
|
||||
and(other.bits, other.wordLength);
|
||||
|
@ -341,6 +398,12 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
obs.advance(numBits);
|
||||
} else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) {
|
||||
final FixedBitSetIterator fbs = (FixedBitSetIterator) iter;
|
||||
andNot(fbs.bits, fbs.numWords);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
fbs.advance(numBits);
|
||||
} else {
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) < numBits) {
|
||||
|
@ -467,7 +530,9 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
|
||||
@Override
|
||||
public FixedBitSet clone() {
|
||||
return new FixedBitSet(this, numBits);
|
||||
long[] bits = new long[this.bits.length];
|
||||
System.arraycopy(this.bits, 0, bits, 0, bits.length);
|
||||
return new FixedBitSet(bits, numBits);
|
||||
}
|
||||
|
||||
/** returns true if both sets have the same bits set */
|
||||
|
|
|
@ -0,0 +1,370 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* BitSet of fixed length (numBits), backed by accessible ({@link #getBits})
|
||||
* long[], accessed with a long index. Use it only if you intend to store more
|
||||
* than 2.1B bits, otherwise you should use {@link FixedBitSet}.
|
||||
*/
|
||||
public final class LongBitSet {
|
||||
|
||||
private final long[] bits;
|
||||
private final long numBits;
|
||||
private final int numWords;
|
||||
|
||||
/**
|
||||
* If the given {@link LongBitSet} is large enough to hold
|
||||
* {@code numBits}, returns the given bits, otherwise returns a new
|
||||
* {@link LongBitSet} which can hold the requested number of bits.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of
|
||||
* the given {@code bits} if possible. Also, calling {@link #length()} on the
|
||||
* returned bits may return a value greater than {@code numBits}.
|
||||
*/
|
||||
public static LongBitSet ensureCapacity(LongBitSet bits, long numBits) {
|
||||
if (numBits < bits.length()) {
|
||||
return bits;
|
||||
} else {
|
||||
int numWords = bits2words(numBits);
|
||||
long[] arr = bits.getBits();
|
||||
if (numWords >= arr.length) {
|
||||
arr = ArrayUtil.grow(arr, numWords + 1);
|
||||
}
|
||||
return new LongBitSet(arr, arr.length << 6);
|
||||
}
|
||||
}
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold numBits */
|
||||
public static int bits2words(long numBits) {
|
||||
int numLong = (int) (numBits >>> 6);
|
||||
if ((numBits & 63) != 0) {
|
||||
numLong++;
|
||||
}
|
||||
return numLong;
|
||||
}
|
||||
|
||||
public LongBitSet(long numBits) {
|
||||
this.numBits = numBits;
|
||||
bits = new long[bits2words(numBits)];
|
||||
numWords = bits.length;
|
||||
}
|
||||
|
||||
public LongBitSet(long[] storedBits, long numBits) {
|
||||
this.numWords = bits2words(numBits);
|
||||
if (numWords > storedBits.length) {
|
||||
throw new IllegalArgumentException("The given long array is too small to hold " + numBits + " bits");
|
||||
}
|
||||
this.numBits = numBits;
|
||||
this.bits = storedBits;
|
||||
}
|
||||
|
||||
/** Returns the number of bits stored in this bitset. */
|
||||
public long length() {
|
||||
return numBits;
|
||||
}
|
||||
|
||||
/** Expert. */
|
||||
public long[] getBits() {
|
||||
return bits;
|
||||
}
|
||||
|
||||
/** Returns number of set bits. NOTE: this visits every
|
||||
* long in the backing bits array, and the result is not
|
||||
* internally cached! */
|
||||
public long cardinality() {
|
||||
return BitUtil.pop_array(bits, 0, bits.length);
|
||||
}
|
||||
|
||||
public boolean get(long index) {
|
||||
assert index >= 0 && index < numBits: "index=" + index;
|
||||
int i = (int) (index >> 6); // div 64
|
||||
// signed shift will keep a negative index and force an
|
||||
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||
int bit = (int) (index & 0x3f); // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
return (bits[i] & bitmask) != 0;
|
||||
}
|
||||
|
||||
public void set(long index) {
|
||||
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
|
||||
int wordNum = (int) (index >> 6); // div 64
|
||||
int bit = (int) (index & 0x3f); // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
bits[wordNum] |= bitmask;
|
||||
}
|
||||
|
||||
public boolean getAndSet(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int) (index >> 6); // div 64
|
||||
int bit = (int) (index & 0x3f); // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||
bits[wordNum] |= bitmask;
|
||||
return val;
|
||||
}
|
||||
|
||||
public void clear(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int) (index >> 6);
|
||||
int bit = (int) (index & 0x03f);
|
||||
long bitmask = 1L << bit;
|
||||
bits[wordNum] &= ~bitmask;
|
||||
}
|
||||
|
||||
public boolean getAndClear(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int) (index >> 6); // div 64
|
||||
int bit = (int) (index & 0x3f); // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||
bits[wordNum] &= ~bitmask;
|
||||
return val;
|
||||
}
|
||||
|
||||
/** Returns the index of the first set bit starting at the index specified.
|
||||
* -1 is returned if there are no more set bits.
|
||||
*/
|
||||
public long nextSetBit(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = (int) (index >> 6);
|
||||
final int subIndex = (int) (index & 0x3f); // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word!=0) {
|
||||
return index + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
|
||||
while(++i < numWords) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return (i<<6) + Long.numberOfTrailingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** Returns the index of the last set bit before or on the index specified.
|
||||
* -1 is returned if there are no more set bits.
|
||||
*/
|
||||
public long prevSetBit(long index) {
|
||||
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
|
||||
int i = (int) (index >> 6);
|
||||
final int subIndex = (int) (index & 0x3f); // index within the word
|
||||
long word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index
|
||||
|
||||
if (word != 0) {
|
||||
return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197
|
||||
}
|
||||
|
||||
while (--i >= 0) {
|
||||
word = bits[i];
|
||||
if (word !=0 ) {
|
||||
return (i << 6) + 63 - Long.numberOfLeadingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** this = this OR other */
|
||||
public void or(LongBitSet other) {
|
||||
int pos = Math.min(numWords, other.numWords);
|
||||
while (--pos >= 0) {
|
||||
bits[pos] |= other.bits[pos];
|
||||
}
|
||||
}
|
||||
|
||||
/** this = this XOR other */
|
||||
public void xor(LongBitSet other) {
|
||||
int pos = Math.min(numWords, other.numWords);
|
||||
while (--pos >= 0) {
|
||||
bits[pos] ^= other.bits[pos];
|
||||
}
|
||||
}
|
||||
|
||||
/** returns true if the sets have any elements in common */
|
||||
public boolean intersects(LongBitSet other) {
|
||||
int pos = Math.min(numWords, other.numWords);
|
||||
while (--pos>=0) {
|
||||
if ((bits[pos] & other.bits[pos]) != 0) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** this = this AND other */
|
||||
public void and(LongBitSet other) {
|
||||
int pos = Math.min(numWords, other.numWords);
|
||||
while (--pos >= 0) {
|
||||
bits[pos] &= other.bits[pos];
|
||||
}
|
||||
if (numWords > other.numWords) {
|
||||
Arrays.fill(bits, other.numWords, numWords, 0L);
|
||||
}
|
||||
}
|
||||
|
||||
/** this = this AND NOT other */
|
||||
public void andNot(LongBitSet other) {
|
||||
int pos = Math.min(numWords, other.bits.length);
|
||||
while (--pos >= 0) {
|
||||
bits[pos] &= ~other.bits[pos];
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: no .isEmpty() here because that's trappy (ie,
|
||||
// typically isEmpty is low cost, but this one wouldn't
|
||||
// be)
|
||||
|
||||
/** Flips a range of bits
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to flip
|
||||
*/
|
||||
public void flip(long startIndex, long endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = (int) (startIndex >> 6);
|
||||
int endWord = (int) ((endIndex-1) >> 6);
|
||||
|
||||
/*** Grrr, java shifting wraps around so -1L>>>64 == -1
|
||||
* for that reason, make sure not to use endmask if the bits to flip will
|
||||
* be zero in the last word (redefine endWord to be the last changed...)
|
||||
long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
|
||||
long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
|
||||
***/
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] ^= (startmask & endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] ^= startmask;
|
||||
|
||||
for (int i=startWord+1; i<endWord; i++) {
|
||||
bits[i] = ~bits[i];
|
||||
}
|
||||
|
||||
bits[endWord] ^= endmask;
|
||||
}
|
||||
|
||||
/** Sets a range of bits
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to set
|
||||
*/
|
||||
public void set(long startIndex, long endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = (int) (startIndex >> 6);
|
||||
int endWord = (int) ((endIndex-1) >> 6);
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] |= (startmask & endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] |= startmask;
|
||||
Arrays.fill(bits, startWord+1, endWord, -1L);
|
||||
bits[endWord] |= endmask;
|
||||
}
|
||||
|
||||
/** Clears a range of bits.
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to clear
|
||||
*/
|
||||
public void clear(long startIndex, long endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = (int) (startIndex >> 6);
|
||||
int endWord = (int) ((endIndex-1) >> 6);
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
// invert masks since we are clearing
|
||||
startmask = ~startmask;
|
||||
endmask = ~endmask;
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] &= (startmask | endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] &= startmask;
|
||||
Arrays.fill(bits, startWord+1, endWord, 0L);
|
||||
bits[endWord] &= endmask;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongBitSet clone() {
|
||||
long[] bits = new long[this.bits.length];
|
||||
System.arraycopy(this.bits, 0, bits, 0, bits.length);
|
||||
return new LongBitSet(bits, numBits);
|
||||
}
|
||||
|
||||
/** returns true if both sets have the same bits set */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof LongBitSet)) {
|
||||
return false;
|
||||
}
|
||||
LongBitSet other = (LongBitSet) o;
|
||||
if (numBits != other.length()) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.equals(bits, other.bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
long h = 0;
|
||||
for (int i = numWords; --i>=0;) {
|
||||
h ^= bits[i];
|
||||
h = (h << 1) | (h >>> 63); // rotate left
|
||||
}
|
||||
// fold leftmost bits into right and add a constant to prevent
|
||||
// empty sets from returning 0, which is too common.
|
||||
return (int) ((h>>32) ^ h) + 0x98761234;
|
||||
}
|
||||
}
|
|
@ -175,10 +175,12 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
|
|||
|
||||
BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
|
||||
BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
|
||||
BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
|
||||
BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
|
||||
|
||||
FixedBitSet b_and = b.clone(); assertEquals(b,b_and); b_and.and(b0);
|
||||
FixedBitSet b_or = b.clone(); b_or.or(b0);
|
||||
FixedBitSet b_xor = b.clone(); b_xor.xor(b0);
|
||||
FixedBitSet b_andn = b.clone(); b_andn.andNot(b0);
|
||||
|
||||
assertEquals(a0.cardinality(), b0.cardinality());
|
||||
|
@ -187,9 +189,11 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
|
|||
doIterate(a_and,b_and, mode);
|
||||
doIterate(a_or,b_or, mode);
|
||||
doIterate(a_andn,b_andn, mode);
|
||||
|
||||
doIterate(a_xor,b_xor, mode);
|
||||
|
||||
assertEquals(a_and.cardinality(), b_and.cardinality());
|
||||
assertEquals(a_or.cardinality(), b_or.cardinality());
|
||||
assertEquals(a_xor.cardinality(), b_xor.cardinality());
|
||||
assertEquals(a_andn.cardinality(), b_andn.cardinality());
|
||||
}
|
||||
|
||||
|
@ -329,49 +333,31 @@ public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
|
|||
checkNextSetBitArray(new int[0], setBits.length + random().nextInt(10));
|
||||
}
|
||||
|
||||
public void testGrow() {
|
||||
public void testEnsureCapacity() {
|
||||
FixedBitSet bits = new FixedBitSet(5);
|
||||
bits.set(1);
|
||||
bits.set(4);
|
||||
|
||||
FixedBitSet newBits = new FixedBitSet(bits, 8); // grow within the word
|
||||
FixedBitSet newBits = FixedBitSet.ensureCapacity(bits, 8); // grow within the word
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
newBits.clear(1);
|
||||
// we align to 64-bits, so even though it shouldn't have, it re-allocated a long[1]
|
||||
assertTrue(bits.get(1));
|
||||
assertFalse(newBits.get(1));
|
||||
|
||||
newBits = new FixedBitSet(bits, 72); // grow beyond one word
|
||||
newBits.set(1);
|
||||
newBits = FixedBitSet.ensureCapacity(newBits, newBits.length() - 2); // reuse
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
}
|
||||
|
||||
public void testShrink() {
|
||||
FixedBitSet bits = new FixedBitSet(72);
|
||||
|
||||
bits.set(1);
|
||||
bits.set(4);
|
||||
bits.set(69);
|
||||
|
||||
FixedBitSet newBits = new FixedBitSet(bits, 66); // shrink within the word
|
||||
newBits = FixedBitSet.ensureCapacity(bits, 72); // grow beyond one word
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
boolean hitError = true;
|
||||
try {
|
||||
newBits.get(69);
|
||||
hitError = false;
|
||||
} catch (AssertionError e) {
|
||||
hitError = true;
|
||||
}
|
||||
assertTrue(hitError);
|
||||
|
||||
newBits = new FixedBitSet(bits, 8); // shrink beyond one word
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
hitError = true;
|
||||
try {
|
||||
newBits.get(69);
|
||||
hitError = false;
|
||||
} catch (AssertionError e) {
|
||||
hitError = true;
|
||||
}
|
||||
assertTrue(hitError);
|
||||
newBits.clear(1);
|
||||
// we grew the long[], so it's not shared
|
||||
assertTrue(bits.get(1));
|
||||
assertFalse(newBits.get(1));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,320 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
public class TestLongBitSet extends LuceneTestCase {
|
||||
|
||||
void doGet(BitSet a, LongBitSet b) {
|
||||
long max = b.length();
|
||||
for (int i=0; i<max; i++) {
|
||||
if (a.get(i) != b.get(i)) {
|
||||
fail("mismatch: BitSet=["+i+"]="+a.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void doNextSetBit(BitSet a, LongBitSet b) {
|
||||
int aa=-1;
|
||||
long bb=-1;
|
||||
do {
|
||||
aa = a.nextSetBit(aa+1);
|
||||
bb = bb < b.length()-1 ? b.nextSetBit(bb+1) : -1;
|
||||
assertEquals(aa,bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doPrevSetBit(BitSet a, LongBitSet b) {
|
||||
int aa = a.size() + random().nextInt(100);
|
||||
long bb = aa;
|
||||
do {
|
||||
// aa = a.prevSetBit(aa-1);
|
||||
aa--;
|
||||
while ((aa >= 0) && (! a.get(aa))) {
|
||||
aa--;
|
||||
}
|
||||
if (b.length() == 0) {
|
||||
bb = -1;
|
||||
} else if (bb > b.length()-1) {
|
||||
bb = b.prevSetBit(b.length()-1);
|
||||
} else if (bb < 1) {
|
||||
bb = -1;
|
||||
} else {
|
||||
bb = bb >= 1 ? b.prevSetBit(bb-1) : -1;
|
||||
}
|
||||
assertEquals(aa,bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doRandomSets(int maxSize, int iter, int mode) throws IOException {
|
||||
BitSet a0=null;
|
||||
LongBitSet b0=null;
|
||||
|
||||
for (int i=0; i<iter; i++) {
|
||||
int sz = _TestUtil.nextInt(random(), 2, maxSize);
|
||||
BitSet a = new BitSet(sz);
|
||||
LongBitSet b = new LongBitSet(sz);
|
||||
|
||||
// test the various ways of setting bits
|
||||
if (sz>0) {
|
||||
int nOper = random().nextInt(sz);
|
||||
for (int j=0; j<nOper; j++) {
|
||||
int idx;
|
||||
|
||||
idx = random().nextInt(sz);
|
||||
a.set(idx);
|
||||
b.set(idx);
|
||||
|
||||
idx = random().nextInt(sz);
|
||||
a.clear(idx);
|
||||
b.clear(idx);
|
||||
|
||||
idx = random().nextInt(sz);
|
||||
a.flip(idx);
|
||||
b.flip(idx, idx+1);
|
||||
|
||||
idx = random().nextInt(sz);
|
||||
a.flip(idx);
|
||||
b.flip(idx, idx+1);
|
||||
|
||||
boolean val2 = b.get(idx);
|
||||
boolean val = b.getAndSet(idx);
|
||||
assertTrue(val2 == val);
|
||||
assertTrue(b.get(idx));
|
||||
|
||||
if (!val) b.clear(idx);
|
||||
assertTrue(b.get(idx) == val);
|
||||
}
|
||||
}
|
||||
|
||||
// test that the various ways of accessing the bits are equivalent
|
||||
doGet(a,b);
|
||||
|
||||
// test ranges, including possible extension
|
||||
int fromIndex, toIndex;
|
||||
fromIndex = random().nextInt(sz/2);
|
||||
toIndex = fromIndex + random().nextInt(sz - fromIndex);
|
||||
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
|
||||
LongBitSet bb = b.clone(); bb.flip(fromIndex,toIndex);
|
||||
|
||||
fromIndex = random().nextInt(sz/2);
|
||||
toIndex = fromIndex + random().nextInt(sz - fromIndex);
|
||||
aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex);
|
||||
bb = b.clone(); bb.clear(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit
|
||||
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
fromIndex = random().nextInt(sz/2);
|
||||
toIndex = fromIndex + random().nextInt(sz - fromIndex);
|
||||
aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex);
|
||||
bb = b.clone(); bb.set(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit
|
||||
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
if (b0 != null && b0.length() <= b.length()) {
|
||||
assertEquals(a.cardinality(), b.cardinality());
|
||||
|
||||
BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
|
||||
BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
|
||||
BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
|
||||
BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
|
||||
|
||||
LongBitSet b_and = b.clone(); assertEquals(b,b_and); b_and.and(b0);
|
||||
LongBitSet b_or = b.clone(); b_or.or(b0);
|
||||
LongBitSet b_xor = b.clone(); b_xor.xor(b0);
|
||||
LongBitSet b_andn = b.clone(); b_andn.andNot(b0);
|
||||
|
||||
assertEquals(a0.cardinality(), b0.cardinality());
|
||||
assertEquals(a_or.cardinality(), b_or.cardinality());
|
||||
|
||||
assertEquals(a_and.cardinality(), b_and.cardinality());
|
||||
assertEquals(a_or.cardinality(), b_or.cardinality());
|
||||
assertEquals(a_xor.cardinality(), b_xor.cardinality());
|
||||
assertEquals(a_andn.cardinality(), b_andn.cardinality());
|
||||
}
|
||||
|
||||
a0=a;
|
||||
b0=b;
|
||||
}
|
||||
}
|
||||
|
||||
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
|
||||
// larger testsuite.
|
||||
public void testSmall() throws IOException {
|
||||
doRandomSets(atLeast(1200), atLeast(1000), 1);
|
||||
doRandomSets(atLeast(1200), atLeast(1000), 2);
|
||||
}
|
||||
|
||||
// uncomment to run a bigger test (~2 minutes).
|
||||
/*
|
||||
public void testBig() {
|
||||
doRandomSets(2000,200000, 1);
|
||||
doRandomSets(2000,200000, 2);
|
||||
}
|
||||
*/
|
||||
|
||||
public void testEquals() {
|
||||
// This test can't handle numBits==0:
|
||||
final int numBits = random().nextInt(2000) + 1;
|
||||
LongBitSet b1 = new LongBitSet(numBits);
|
||||
LongBitSet b2 = new LongBitSet(numBits);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
|
||||
int idx = random().nextInt(numBits);
|
||||
if (!b1.get(idx)) {
|
||||
b1.set(idx);
|
||||
assertFalse(b1.equals(b2));
|
||||
assertFalse(b2.equals(b1));
|
||||
b2.set(idx);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
}
|
||||
}
|
||||
|
||||
// try different type of object
|
||||
assertFalse(b1.equals(new Object()));
|
||||
}
|
||||
|
||||
public void testHashCodeEquals() {
|
||||
// This test can't handle numBits==0:
|
||||
final int numBits = random().nextInt(2000) + 1;
|
||||
LongBitSet b1 = new LongBitSet(numBits);
|
||||
LongBitSet b2 = new LongBitSet(numBits);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
|
||||
int idx = random().nextInt(numBits);
|
||||
if (!b1.get(idx)) {
|
||||
b1.set(idx);
|
||||
assertFalse(b1.equals(b2));
|
||||
assertFalse(b1.hashCode() == b2.hashCode());
|
||||
b2.set(idx);
|
||||
assertEquals(b1, b2);
|
||||
assertEquals(b1.hashCode(), b2.hashCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSmallBitSets() {
|
||||
// Make sure size 0-10 bit sets are OK:
|
||||
for(int numBits=0;numBits<10;numBits++) {
|
||||
LongBitSet b1 = new LongBitSet(numBits);
|
||||
LongBitSet b2 = new LongBitSet(numBits);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertEquals(b1.hashCode(), b2.hashCode());
|
||||
assertEquals(0, b1.cardinality());
|
||||
if (numBits > 0) {
|
||||
b1.set(0, numBits);
|
||||
assertEquals(numBits, b1.cardinality());
|
||||
b1.flip(0, numBits);
|
||||
assertEquals(0, b1.cardinality());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private LongBitSet makeLongFixedBitSet(int[] a, int numBits) {
|
||||
LongBitSet bs;
|
||||
if (random().nextBoolean()) {
|
||||
int bits2words = LongBitSet.bits2words(numBits);
|
||||
long[] words = new long[bits2words + random().nextInt(100)];
|
||||
for (int i = bits2words; i < words.length; i++) {
|
||||
words[i] = random().nextLong();
|
||||
}
|
||||
bs = new LongBitSet(words, numBits);
|
||||
|
||||
} else {
|
||||
bs = new LongBitSet(numBits);
|
||||
}
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private BitSet makeBitSet(int[] a) {
|
||||
BitSet bs = new BitSet();
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private void checkPrevSetBitArray(int [] a, int numBits) {
|
||||
LongBitSet obs = makeLongFixedBitSet(a, numBits);
|
||||
BitSet bs = makeBitSet(a);
|
||||
doPrevSetBit(bs, obs);
|
||||
}
|
||||
|
||||
public void testPrevSetBit() {
|
||||
checkPrevSetBitArray(new int[] {}, 0);
|
||||
checkPrevSetBitArray(new int[] {0}, 1);
|
||||
checkPrevSetBitArray(new int[] {0,2}, 3);
|
||||
}
|
||||
|
||||
|
||||
private void checkNextSetBitArray(int [] a, int numBits) {
|
||||
LongBitSet obs = makeLongFixedBitSet(a, numBits);
|
||||
BitSet bs = makeBitSet(a);
|
||||
doNextSetBit(bs, obs);
|
||||
}
|
||||
|
||||
public void testNextBitSet() {
|
||||
int[] setBits = new int[0+random().nextInt(1000)];
|
||||
for (int i = 0; i < setBits.length; i++) {
|
||||
setBits[i] = random().nextInt(setBits.length);
|
||||
}
|
||||
checkNextSetBitArray(setBits, setBits.length + random().nextInt(10));
|
||||
|
||||
checkNextSetBitArray(new int[0], setBits.length + random().nextInt(10));
|
||||
}
|
||||
|
||||
public void testEnsureCapacity() {
|
||||
LongBitSet bits = new LongBitSet(5);
|
||||
bits.set(1);
|
||||
bits.set(4);
|
||||
|
||||
LongBitSet newBits = LongBitSet.ensureCapacity(bits, 8); // grow within the word
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
newBits.clear(1);
|
||||
// we align to 64-bits, so even though it shouldn't have, it re-allocated a long[1]
|
||||
assertTrue(bits.get(1));
|
||||
assertFalse(newBits.get(1));
|
||||
|
||||
newBits.set(1);
|
||||
newBits = LongBitSet.ensureCapacity(newBits, newBits.length() - 2); // reuse
|
||||
assertTrue(newBits.get(1));
|
||||
|
||||
bits.set(1);
|
||||
newBits = LongBitSet.ensureCapacity(bits, 72); // grow beyond one word
|
||||
assertTrue(newBits.get(1));
|
||||
assertTrue(newBits.get(4));
|
||||
newBits.clear(1);
|
||||
// we grew the long[], so it's not shared
|
||||
assertTrue(bits.get(1));
|
||||
assertFalse(newBits.get(1));
|
||||
}
|
||||
|
||||
}
|
|
@ -221,7 +221,7 @@ public class TestNumericUtils extends LuceneTestCase {
|
|||
final boolean useBitSet, final Iterable<Long> expectedBounds, final Iterable<Integer> expectedShifts
|
||||
) {
|
||||
// Cannot use FixedBitSet since the range could be long:
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
final LongBitSet bits=useBitSet ? new LongBitSet(upper-lower+1) : null;
|
||||
final Iterator<Long> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
|
||||
final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
|
@ -24,10 +26,7 @@ import org.apache.lucene.search.DocIdSet;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -127,23 +126,17 @@ public class ChainedFilter extends Filter {
|
|||
}
|
||||
}
|
||||
|
||||
private OpenBitSetDISI initialResult(AtomicReaderContext context, int logic, int[] index)
|
||||
private FixedBitSet initialResult(AtomicReaderContext context, int logic, int[] index)
|
||||
throws IOException {
|
||||
AtomicReader reader = context.reader();
|
||||
OpenBitSetDISI result;
|
||||
/**
|
||||
* First AND operation takes place against a completely false
|
||||
* bitset and will always return zero results.
|
||||
*/
|
||||
FixedBitSet result = new FixedBitSet(reader.maxDoc());
|
||||
if (logic == AND) {
|
||||
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
|
||||
result.or(getDISI(chain[index[0]], context));
|
||||
++index[0];
|
||||
} else if (logic == ANDNOT) {
|
||||
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
|
||||
result.or(getDISI(chain[index[0]], context));
|
||||
result.flip(0, reader.maxDoc()); // NOTE: may set bits for deleted docs.
|
||||
++index[0];
|
||||
} else {
|
||||
result = new OpenBitSetDISI(reader.maxDoc());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -157,7 +150,7 @@ public class ChainedFilter extends Filter {
|
|||
*/
|
||||
private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index)
|
||||
throws IOException {
|
||||
OpenBitSetDISI result = initialResult(context, logic, index);
|
||||
FixedBitSet result = initialResult(context, logic, index);
|
||||
for (; index[0] < chain.length; index[0]++) {
|
||||
// we dont pass acceptDocs, we will filter at the end using an additional filter
|
||||
doChain(result, logic, chain[index[0]].getDocIdSet(context, null));
|
||||
|
@ -178,7 +171,7 @@ public class ChainedFilter extends Filter {
|
|||
throw new IllegalArgumentException("Invalid number of elements in logic array");
|
||||
}
|
||||
|
||||
OpenBitSetDISI result = initialResult(context, logic[0], index);
|
||||
FixedBitSet result = initialResult(context, logic[0], index);
|
||||
for (; index[0] < chain.length; index[0]++) {
|
||||
// we dont pass acceptDocs, we will filter at the end using an additional filter
|
||||
doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null));
|
||||
|
@ -198,23 +191,21 @@ public class ChainedFilter extends Filter {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
|
||||
throws IOException {
|
||||
|
||||
if (dis instanceof OpenBitSet) {
|
||||
// optimized case for OpenBitSets
|
||||
private void doChain(FixedBitSet result, int logic, DocIdSet dis) throws IOException {
|
||||
if (dis instanceof FixedBitSet) {
|
||||
// optimized case for FixedBitSets
|
||||
switch (logic) {
|
||||
case OR:
|
||||
result.or((OpenBitSet) dis);
|
||||
result.or((FixedBitSet) dis);
|
||||
break;
|
||||
case AND:
|
||||
result.and((OpenBitSet) dis);
|
||||
result.and((FixedBitSet) dis);
|
||||
break;
|
||||
case ANDNOT:
|
||||
result.andNot((OpenBitSet) dis);
|
||||
result.andNot((FixedBitSet) dis);
|
||||
break;
|
||||
case XOR:
|
||||
result.xor((OpenBitSet) dis);
|
||||
result.xor((FixedBitSet) dis);
|
||||
break;
|
||||
default:
|
||||
doChain(result, DEFAULT, dis);
|
||||
|
@ -233,16 +224,16 @@ public class ChainedFilter extends Filter {
|
|||
|
||||
switch (logic) {
|
||||
case OR:
|
||||
result.inPlaceOr(disi);
|
||||
result.or(disi);
|
||||
break;
|
||||
case AND:
|
||||
result.inPlaceAnd(disi);
|
||||
result.and(disi);
|
||||
break;
|
||||
case ANDNOT:
|
||||
result.inPlaceNot(disi);
|
||||
result.andNot(disi);
|
||||
break;
|
||||
case XOR:
|
||||
result.inPlaceXor(disi);
|
||||
result.xor(disi);
|
||||
break;
|
||||
default:
|
||||
doChain(result, DEFAULT, dis);
|
||||
|
|
|
@ -83,7 +83,7 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre
|
|||
* The {@link #getDocIdSet()} method here starts the work. It first checks
|
||||
* that there are indexed terms; if not it quickly returns null. Then it calls
|
||||
* {@link #start()} so a subclass can set up a return value, like an
|
||||
* {@link org.apache.lucene.util.OpenBitSet}. Then it starts the traversal
|
||||
* {@link org.apache.lucene.util.FixedBitSet}. Then it starts the traversal
|
||||
* process, calling {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Cell)}
|
||||
* which by default finds the top cells that intersect {@code queryShape}. If
|
||||
* there isn't an indexed cell for a corresponding cell returned for this
|
||||
|
|
|
@ -36,8 +36,7 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* Just like {@link Lucene45DocValuesFormat} but with additional asserts.
|
||||
|
@ -147,7 +146,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
|||
|
||||
int docCount = 0;
|
||||
long ordCount = 0;
|
||||
OpenBitSet seenOrds = new OpenBitSet(valueCount);
|
||||
LongBitSet seenOrds = new LongBitSet(valueCount);
|
||||
Iterator<Number> ordIterator = ords.iterator();
|
||||
for (Number v : docToOrdCount) {
|
||||
assert v != null;
|
||||
|
|
Loading…
Reference in New Issue