mirror of https://github.com/apache/lucene.git
LUCENE-3280: add FixedBitSet and cutover most of Lucene
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fe93de7319
commit
a17df303e9
|
@ -49,7 +49,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class HighlighterPhraseTest extends LuceneTestCase {
|
||||
private static final String FIELD = "text";
|
||||
|
@ -119,7 +119,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
|
||||
new SpanTermQuery(new Term(FIELD, "fox")),
|
||||
new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
|
||||
final OpenBitSet bitset = new OpenBitSet();
|
||||
final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
|
||||
indexSearcher.search(phraseQuery, new Collector() {
|
||||
private int baseDoc;
|
||||
|
||||
|
@ -146,10 +146,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
}
|
||||
});
|
||||
assertEquals(1, bitset.cardinality());
|
||||
final int maxDoc = indexReader.maxDoc();
|
||||
final Highlighter highlighter = new Highlighter(
|
||||
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
|
||||
new QueryScorer(phraseQuery));
|
||||
for (int position = bitset.nextSetBit(0); position >= 0; position = bitset
|
||||
for (int position = bitset.nextSetBit(0); position >= 0 && position < maxDoc-1; position = bitset
|
||||
.nextSetBit(position + 1)) {
|
||||
assertEquals(0, position);
|
||||
final TokenStream tokenStream = TokenSources.getTokenStream(
|
||||
|
|
|
@ -25,8 +25,8 @@ import org.apache.lucene.index.IndexWriter; // javadoc
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -178,7 +178,7 @@ public class MultiPassIndexSplitter {
|
|||
* list of deletions.
|
||||
*/
|
||||
public static final class FakeDeleteIndexReader extends FilterIndexReader {
|
||||
OpenBitSet liveDocs;
|
||||
FixedBitSet liveDocs;
|
||||
|
||||
public FakeDeleteIndexReader(IndexReader in) {
|
||||
super(new SlowMultiReaderWrapper(in));
|
||||
|
@ -197,13 +197,13 @@ public class MultiPassIndexSplitter {
|
|||
@Override
|
||||
protected void doUndeleteAll() {
|
||||
final int maxDoc = in.maxDoc();
|
||||
liveDocs = new OpenBitSet(maxDoc);
|
||||
liveDocs = new FixedBitSet(in.maxDoc());
|
||||
if (in.hasDeletions()) {
|
||||
final Bits oldLiveDocs = in.getLiveDocs();
|
||||
assert oldLiveDocs != null;
|
||||
// this loop is a little bit ineffective, as Bits has no nextSetBit():
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (oldLiveDocs.get(i)) liveDocs.fastSet(i);
|
||||
if (oldLiveDocs.get(i)) liveDocs.set(i);
|
||||
}
|
||||
} else {
|
||||
// mark all docs as valid
|
||||
|
|
|
@ -27,8 +27,8 @@ import org.apache.lucene.search.Filter;
|
|||
import org.apache.lucene.search.TermRangeFilter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -94,12 +94,12 @@ public class PKIndexSplitter {
|
|||
super(new SlowMultiReaderWrapper(reader));
|
||||
|
||||
final int maxDoc = in.maxDoc();
|
||||
final OpenBitSetDISI bits = new OpenBitSetDISI(maxDoc);
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext());
|
||||
if (docs != null) {
|
||||
final DocIdSetIterator it = docs.iterator();
|
||||
if (it != null) {
|
||||
bits.inPlaceOr(it);
|
||||
bits.or(it);
|
||||
}
|
||||
}
|
||||
if (negateFilter) {
|
||||
|
@ -113,7 +113,7 @@ public class PKIndexSplitter {
|
|||
for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) {
|
||||
if (!oldLiveDocs.get(i)) {
|
||||
// we can safely modify the current bit, as the iterator already stepped over it:
|
||||
bits.fastClear(i);
|
||||
bits.clear(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
public class DuplicateFilter extends Filter
|
||||
|
@ -84,8 +84,8 @@ public class DuplicateFilter extends Filter
|
|||
}
|
||||
}
|
||||
|
||||
private OpenBitSet correctBits(IndexReader reader) throws IOException {
|
||||
OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid
|
||||
private FixedBitSet correctBits(IndexReader reader) throws IOException {
|
||||
FixedBitSet bits = new FixedBitSet(reader.maxDoc()); //assume all are INvalid
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
Terms terms = reader.fields().terms(fieldName);
|
||||
if (terms != null) {
|
||||
|
@ -119,10 +119,10 @@ public class DuplicateFilter extends Filter
|
|||
return bits;
|
||||
}
|
||||
|
||||
private OpenBitSet fastBits(IndexReader reader) throws IOException
|
||||
private FixedBitSet fastBits(IndexReader reader) throws IOException
|
||||
{
|
||||
|
||||
OpenBitSet bits=new OpenBitSet(reader.maxDoc());
|
||||
FixedBitSet bits=new FixedBitSet(reader.maxDoc());
|
||||
bits.set(0,reader.maxDoc()); //assume all are valid
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
Terms terms = reader.fields().terms(fieldName);
|
||||
|
|
|
@ -111,6 +111,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
|
||||
final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, query.field);
|
||||
// Cannot use FixedBitSet because we require long index (ord):
|
||||
final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd());
|
||||
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
|
||||
|
||||
|
@ -142,7 +143,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
do {
|
||||
long ord = termsEnum.ord();
|
||||
if (ord > 0) {
|
||||
termSet.fastSet(ord);
|
||||
termSet.set(ord);
|
||||
termCount++;
|
||||
}
|
||||
} while (termsEnum.next() != null);
|
||||
|
@ -155,7 +156,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) {
|
||||
@Override
|
||||
boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
|
||||
return termSet.fastGet(fcsi.getOrd(doc));
|
||||
return termSet.get(fcsi.getOrd(doc));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.index.DocsEnum;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -60,7 +60,7 @@ public class TermsFilter extends Filter
|
|||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
|
||||
IndexReader reader = context.reader;
|
||||
OpenBitSet result=new OpenBitSet(reader.maxDoc());
|
||||
FixedBitSet result=new FixedBitSet(reader.maxDoc());
|
||||
Fields fields = reader.fields();
|
||||
BytesRef br = new BytesRef();
|
||||
Bits liveDocs = reader.getLiveDocs();
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.index.SlowMultiReaderWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class TermsFilterTest extends LuceneTestCase {
|
||||
|
||||
|
@ -67,19 +67,19 @@ public class TermsFilterTest extends LuceneTestCase {
|
|||
|
||||
TermsFilter tf=new TermsFilter();
|
||||
tf.addTerm(new Term(fieldName,"19"));
|
||||
OpenBitSet bits = (OpenBitSet)tf.getDocIdSet(context);
|
||||
FixedBitSet bits = (FixedBitSet)tf.getDocIdSet(context);
|
||||
assertEquals("Must match nothing", 0, bits.cardinality());
|
||||
|
||||
tf.addTerm(new Term(fieldName,"20"));
|
||||
bits = (OpenBitSet)tf.getDocIdSet(context);
|
||||
bits = (FixedBitSet)tf.getDocIdSet(context);
|
||||
assertEquals("Must match 1", 1, bits.cardinality());
|
||||
|
||||
tf.addTerm(new Term(fieldName,"10"));
|
||||
bits = (OpenBitSet)tf.getDocIdSet(context);
|
||||
bits = (FixedBitSet)tf.getDocIdSet(context);
|
||||
assertEquals("Must match 2", 2, bits.cardinality());
|
||||
|
||||
tf.addTerm(new Term(fieldName,"00"));
|
||||
bits = (OpenBitSet)tf.getDocIdSet(context);
|
||||
bits = (FixedBitSet)tf.getDocIdSet(context);
|
||||
assertEquals("Must match 2", 2, bits.cardinality());
|
||||
|
||||
reader.close();
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* <p><font color="red"><b>NOTE:</b> This API is still in
|
||||
|
@ -67,7 +67,7 @@ public class CartesianShapeFilter extends Filter {
|
|||
}
|
||||
};
|
||||
} else {
|
||||
final OpenBitSet bits = new OpenBitSet(context.reader.maxDoc());
|
||||
final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc());
|
||||
for (int i =0; i< sz; i++) {
|
||||
double boxId = area.get(i).doubleValue();
|
||||
NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
|
||||
|
@ -77,7 +77,7 @@ public class CartesianShapeFilter extends Filter {
|
|||
// which have this boxId
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
bits.fastSet(doc);
|
||||
bits.set(doc);
|
||||
}
|
||||
}
|
||||
return bits;
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.WeakHashMap;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
|
@ -173,7 +173,7 @@ public class CachingWrapperFilter extends Filter {
|
|||
* by the wrapped Filter.
|
||||
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
|
||||
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
|
||||
* an {@link OpenBitSetDISI}.
|
||||
* an {@link FixedBitSet}.
|
||||
*/
|
||||
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
|
||||
if (docIdSet == null) {
|
||||
|
@ -186,7 +186,13 @@ public class CachingWrapperFilter extends Filter {
|
|||
// null is allowed to be returned by iterator(),
|
||||
// in this case we wrap with the empty set,
|
||||
// which is cacheable.
|
||||
return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
|
||||
if (it == null) {
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
} else {
|
||||
final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
|
||||
bits.or(it);
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.DocsEnum; // javadoc @link
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -54,11 +54,11 @@ import org.apache.lucene.util.BytesRef;
|
|||
* <p/>
|
||||
*
|
||||
* With each search, this filter translates the specified
|
||||
* set of Terms into a private {@link OpenBitSet} keyed by
|
||||
* set of Terms into a private {@link FixedBitSet} keyed by
|
||||
* term number per unique {@link IndexReader} (normally one
|
||||
* reader per segment). Then, during matching, the term
|
||||
* number for each docID is retrieved from the cache and
|
||||
* then checked for inclusion using the {@link OpenBitSet}.
|
||||
* then checked for inclusion using the {@link FixedBitSet}.
|
||||
* Since all testing is done using RAM resident data
|
||||
* structures, performance should be very fast, most likely
|
||||
* fast enough to not require further caching of the
|
||||
|
@ -69,12 +69,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* <p/>
|
||||
*
|
||||
* In contrast, TermsFilter builds up an {@link OpenBitSet},
|
||||
* In contrast, TermsFilter builds up an {@link FixedBitSet},
|
||||
* keyed by docID, every time it's created, by enumerating
|
||||
* through all matching docs using {@link DocsEnum} to seek
|
||||
* and scan through each term's docID list. While there is
|
||||
* no linear scan of all docIDs, besides the allocation of
|
||||
* the underlying array in the {@link OpenBitSet}, this
|
||||
* the underlying array in the {@link FixedBitSet}, this
|
||||
* approach requires a number of "disk seeks" in proportion
|
||||
* to the number of terms, which can be exceptionally costly
|
||||
* when there are cache misses in the OS's IO cache.
|
||||
|
@ -123,16 +123,16 @@ public class FieldCacheTermsFilter extends Filter {
|
|||
protected class FieldCacheTermsFilterDocIdSet extends DocIdSet {
|
||||
private FieldCache.DocTermsIndex fcsi;
|
||||
|
||||
private OpenBitSet openBitSet;
|
||||
private FixedBitSet bits;
|
||||
|
||||
public FieldCacheTermsFilterDocIdSet(FieldCache.DocTermsIndex fcsi) {
|
||||
this.fcsi = fcsi;
|
||||
openBitSet = new OpenBitSet(this.fcsi.numOrd());
|
||||
bits = new FixedBitSet(this.fcsi.numOrd());
|
||||
final BytesRef spare = new BytesRef();
|
||||
for (int i=0;i<terms.length;i++) {
|
||||
int termNumber = this.fcsi.binarySearchLookup(terms[i], spare);
|
||||
if (termNumber > 0) {
|
||||
openBitSet.fastSet(termNumber);
|
||||
bits.set(termNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -159,7 +159,7 @@ public class FieldCacheTermsFilter extends Filter {
|
|||
@Override
|
||||
public int nextDoc() {
|
||||
try {
|
||||
while (!openBitSet.fastGet(fcsi.getOrd(++doc))) {}
|
||||
while (!bits.get(fcsi.getOrd(++doc))) {}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
@ -170,7 +170,7 @@ public class FieldCacheTermsFilter extends Filter {
|
|||
public int advance(int target) {
|
||||
try {
|
||||
doc = target;
|
||||
while (!openBitSet.fastGet(fcsi.getOrd(doc))) {
|
||||
while (!bits.get(fcsi.getOrd(doc))) {
|
||||
doc++;
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
|
|
|
@ -25,8 +25,8 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
/**
|
||||
* A wrapper for {@link MultiTermQuery}, that exposes its
|
||||
|
@ -122,8 +122,8 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
final TermsEnum termsEnum = query.getTermsEnum(terms);
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.next() != null) {
|
||||
// fill into a OpenBitSet
|
||||
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
|
||||
// fill into a FixedBitSet
|
||||
final FixedBitSet bitSet = new FixedBitSet(context.reader.maxDoc());
|
||||
int termCount = 0;
|
||||
final Bits liveDocs = reader.getLiveDocs();
|
||||
DocsEnum docsEnum = null;
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -60,7 +60,7 @@ public class SpanQueryFilter extends SpanFilter {
|
|||
@Override
|
||||
public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException {
|
||||
|
||||
final OpenBitSet bits = new OpenBitSet(context.reader.maxDoc());
|
||||
final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc());
|
||||
Spans spans = query.getSpans(context);
|
||||
List<SpanFilterResult.PositionInfo> tmp = new ArrayList<SpanFilterResult.PositionInfo>(20);
|
||||
int currentDoc = -1;
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.cache.CachedArray.ByteValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class ByteValuesCreator extends CachedArrayCreator<ByteValues>
|
||||
{
|
||||
|
@ -110,7 +110,7 @@ public class ByteValuesCreator extends CachedArrayCreator<ByteValues>
|
|||
vals.values = new byte[maxDoc];
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public abstract class CachedArrayCreator<T extends CachedArray> extends EntryCreatorWithOptions<T>
|
||||
{
|
||||
|
@ -101,7 +101,7 @@ public abstract class CachedArrayCreator<T extends CachedArray> extends EntryCre
|
|||
/**
|
||||
* Utility function to help check what bits are valid
|
||||
*/
|
||||
protected Bits checkMatchAllBits( OpenBitSet valid, int numDocs, int maxDocs )
|
||||
protected Bits checkMatchAllBits( FixedBitSet valid, int numDocs, int maxDocs )
|
||||
{
|
||||
if( numDocs != maxDocs ) {
|
||||
if( hasOption( OPTION_CACHE_BITS ) ) {
|
||||
|
@ -124,7 +124,7 @@ public abstract class CachedArrayCreator<T extends CachedArray> extends EntryCre
|
|||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = new OpenBitSet( reader.maxDoc() );
|
||||
FixedBitSet validBits = new FixedBitSet( reader.maxDoc() );
|
||||
DocsEnum docs = null;
|
||||
while(true) {
|
||||
final BytesRef term = termsEnum.next();
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.cache.CachedArray.DoubleValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class DoubleValuesCreator extends CachedArrayCreator<DoubleValues>
|
||||
{
|
||||
|
@ -120,7 +120,7 @@ public class DoubleValuesCreator extends CachedArrayCreator<DoubleValues>
|
|||
vals.values = null;
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.cache.CachedArray.FloatValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class FloatValuesCreator extends CachedArrayCreator<FloatValues>
|
||||
{
|
||||
|
@ -121,7 +121,7 @@ public class FloatValuesCreator extends CachedArrayCreator<FloatValues>
|
|||
vals.values = null;
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.cache.CachedArray.IntValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class IntValuesCreator extends CachedArrayCreator<IntValues>
|
||||
{
|
||||
|
@ -121,7 +121,7 @@ public class IntValuesCreator extends CachedArrayCreator<IntValues>
|
|||
vals.values = null;
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.Parser;
|
|||
import org.apache.lucene.search.cache.CachedArray.LongValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class LongValuesCreator extends CachedArrayCreator<LongValues>
|
||||
{
|
||||
|
@ -121,7 +121,7 @@ public class LongValuesCreator extends CachedArrayCreator<LongValues>
|
|||
vals.values = null;
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.FieldCache.ShortParser;
|
|||
import org.apache.lucene.search.cache.CachedArray.ShortValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
public class ShortValuesCreator extends CachedArrayCreator<ShortValues>
|
||||
{
|
||||
|
@ -111,7 +111,7 @@ public class ShortValuesCreator extends CachedArrayCreator<ShortValues>
|
|||
vals.values = new short[maxDoc];
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||
FixedBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new FixedBitSet( maxDoc ) : null;
|
||||
DocsEnum docs = null;
|
||||
try {
|
||||
while(true) {
|
||||
|
|
|
@ -0,0 +1,336 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
// TODO: maybe merge with BitVector? Problem is BitVector
|
||||
// caches its cardinality...
|
||||
|
||||
/** BitSet of fixed length (numBits), backed by accessible
|
||||
* ({@link #getBits}) long[], accessed with an int index,
|
||||
* implementing Bits and DocIdSet. Unlike {@link
|
||||
* OpenBitSet} this bit set does not auto-expand, cannot
|
||||
* handle long index, and does not have fastXX/XX variants
|
||||
* (just X).
|
||||
*
|
||||
* @lucene.internal
|
||||
**/
|
||||
|
||||
public final class FixedBitSet extends DocIdSet implements Bits {
|
||||
private final long[] bits;
|
||||
private int numBits;
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold numBits */
|
||||
public static int bits2words(int numBits) {
|
||||
int numLong = numBits >>> 6;
|
||||
if ((numBits & 63) != 0) {
|
||||
numLong++;
|
||||
}
|
||||
return numLong;
|
||||
}
|
||||
|
||||
public FixedBitSet(int numBits) {
|
||||
this.numBits = numBits;
|
||||
bits = new long[bits2words(numBits)];
|
||||
}
|
||||
|
||||
/** Makes full copy. */
|
||||
public FixedBitSet(FixedBitSet other) {
|
||||
bits = new long[other.bits.length];
|
||||
System.arraycopy(other.bits, 0, bits, 0, bits.length);
|
||||
numBits = other.numBits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new OpenBitSetIterator(bits, bits.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return numBits;
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Expert. */
|
||||
public long[] getBits() {
|
||||
return bits;
|
||||
}
|
||||
|
||||
/** Returns number of set bits. NOTE: this visits every
|
||||
* long in the backing bits array, and the result is not
|
||||
* internally cached! */
|
||||
public int cardinality() {
|
||||
return (int) BitUtil.pop_array(bits, 0, bits.length);
|
||||
}
|
||||
|
||||
public boolean get(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = index >> 6; // div 64
|
||||
// signed shift will keep a negative index and force an
|
||||
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
return (bits[i] & bitmask) != 0;
|
||||
}
|
||||
|
||||
public void set(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
bits[wordNum] |= bitmask;
|
||||
}
|
||||
|
||||
public boolean getAndSet(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||
bits[wordNum] |= bitmask;
|
||||
return val;
|
||||
}
|
||||
|
||||
public void clear(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6;
|
||||
int bit = index & 0x03f;
|
||||
long bitmask = 1L << bit;
|
||||
bits[wordNum] &= ~bitmask;
|
||||
}
|
||||
|
||||
public boolean getAndClear(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||
bits[wordNum] &= ~bitmask;
|
||||
return val;
|
||||
}
|
||||
|
||||
/** Returns the index of the first set bit starting at the index specified.
|
||||
* -1 is returned if there are no more set bits.
|
||||
*/
|
||||
public int nextSetBit(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = index >> 6;
|
||||
int subIndex = index & 0x3f; // index within the word
|
||||
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||
|
||||
if (word!=0) {
|
||||
return (i<<6) + subIndex + BitUtil.ntz(word);
|
||||
}
|
||||
|
||||
while(++i < bits.length) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
return (i<<6) + BitUtil.ntz(word);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int prevSetBit(int index) {
|
||||
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
|
||||
int i = index >> 6;
|
||||
final int subIndex;
|
||||
long word;
|
||||
subIndex = index & 0x3f; // index within the word
|
||||
word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index
|
||||
|
||||
if (word != 0) {
|
||||
return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197
|
||||
}
|
||||
|
||||
while (--i >= 0) {
|
||||
word = bits[i];
|
||||
if (word !=0 ) {
|
||||
return (i << 6) + 63 - Long.numberOfLeadingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** Does in-place OR of the bits provided by the
|
||||
* iterator. */
|
||||
public void or(DocIdSetIterator iter) throws IOException {
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
set(doc);
|
||||
}
|
||||
}
|
||||
|
||||
public void or(FixedBitSet other) {
|
||||
long[] thisArr = this.bits;
|
||||
long[] otherArr = other.bits;
|
||||
int pos = Math.min(thisArr.length, otherArr.length);
|
||||
while (--pos >= 0) {
|
||||
thisArr[pos] |= otherArr[pos];
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: no .isEmpty() here because that's trappy (ie,
|
||||
// typically isEmpty is low cost, but this one wouldn't
|
||||
// be)
|
||||
|
||||
/** Flips a range of bits
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to flip
|
||||
*/
|
||||
public void flip(int startIndex, int endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = startIndex >> 6;
|
||||
int endWord = (endIndex-1) >> 6;
|
||||
|
||||
/*** Grrr, java shifting wraps around so -1L>>>64 == -1
|
||||
* for that reason, make sure not to use endmask if the bits to flip will
|
||||
* be zero in the last word (redefine endWord to be the last changed...)
|
||||
long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
|
||||
long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
|
||||
***/
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] ^= (startmask & endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] ^= startmask;
|
||||
|
||||
for (int i=startWord+1; i<endWord; i++) {
|
||||
bits[i] = ~bits[i];
|
||||
}
|
||||
|
||||
bits[endWord] ^= endmask;
|
||||
}
|
||||
|
||||
/** Sets a range of bits
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to set
|
||||
*/
|
||||
public void set(int startIndex, int endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = startIndex >> 6;
|
||||
int endWord = (endIndex-1) >> 6;
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] |= (startmask & endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] |= startmask;
|
||||
Arrays.fill(bits, startWord+1, endWord, -1L);
|
||||
bits[endWord] |= endmask;
|
||||
}
|
||||
|
||||
/** Clears a range of bits.
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to clear
|
||||
*/
|
||||
public void clear(int startIndex, int endIndex) {
|
||||
assert startIndex >= 0 && startIndex < numBits;
|
||||
assert endIndex >= 0 && endIndex <= numBits;
|
||||
if (endIndex <= startIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
int startWord = startIndex >> 6;
|
||||
int endWord = (endIndex-1) >> 6;
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
// invert masks since we are clearing
|
||||
startmask = ~startmask;
|
||||
endmask = ~endmask;
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] &= (startmask | endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] &= startmask;
|
||||
Arrays.fill(bits, startWord+1, endWord, 0L);
|
||||
bits[endWord] &= endmask;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new FixedBitSet(this);
|
||||
}
|
||||
|
||||
/** returns true if both sets have the same bits set */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof FixedBitSet)) {
|
||||
return false;
|
||||
}
|
||||
FixedBitSet other = (FixedBitSet) o;
|
||||
if (numBits != other.length()) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.equals(bits, other.bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
long h = 0;
|
||||
for (int i = bits.length; --i>=0;) {
|
||||
h ^= bits[i];
|
||||
h = (h << 1) | (h >>> 63); // rotate left
|
||||
}
|
||||
// fold leftmost bits into right and add a constant to prevent
|
||||
// empty sets from returning 0, which is too common.
|
||||
return (int) ((h>>32) ^ h) + 0x98761234;
|
||||
}
|
||||
}
|
|
@ -85,6 +85,7 @@ public class SortedVIntList extends DocIdSet {
|
|||
* Create a SortedVIntList from an OpenBitSet.
|
||||
* @param bits A bit set representing a set of integers.
|
||||
*/
|
||||
/*
|
||||
public SortedVIntList(OpenBitSet bits) {
|
||||
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||
int nextInt = bits.nextSetBit(0);
|
||||
|
@ -94,6 +95,7 @@ public class SortedVIntList extends DocIdSet {
|
|||
}
|
||||
builder.done();
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Create a SortedVIntList.
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.codecs.CodecProvider;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestLongPostings extends LuceneTestCase {
|
||||
|
@ -91,7 +91,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
*/
|
||||
}
|
||||
|
||||
final OpenBitSet isS1 = new OpenBitSet(NUM_DOCS);
|
||||
final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
|
||||
for(int idx=0;idx<NUM_DOCS;idx++) {
|
||||
if (random.nextBoolean()) {
|
||||
isS1.set(idx);
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Before;
|
||||
|
||||
|
@ -233,11 +233,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
// run in random order to test if fill works correctly during merges
|
||||
Collections.shuffle(numVariantList, random);
|
||||
for (ValueType val : numVariantList) {
|
||||
OpenBitSet deleted = indexValues(w, numValues, val, numVariantList,
|
||||
FixedBitSet deleted = indexValues(w, numValues, val, numVariantList,
|
||||
withDeletions, 7);
|
||||
List<Closeable> closeables = new ArrayList<Closeable>();
|
||||
IndexReader r = IndexReader.open(w, true);
|
||||
final int numRemainingValues = (int) (numValues - deleted.cardinality());
|
||||
final int numRemainingValues = numValues - deleted.cardinality();
|
||||
final int base = r.numDocs() - numRemainingValues;
|
||||
// for FIXED_INTS_8 we use value mod 128 - to enable testing in
|
||||
// one go we simply use numValues as the mod for all other INT types
|
||||
|
@ -331,11 +331,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
for (ValueType byteIndexValue : byteVariantList) {
|
||||
List<Closeable> closeables = new ArrayList<Closeable>();
|
||||
final int bytesSize = 1 + atLeast(50);
|
||||
OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
|
||||
FixedBitSet deleted = indexValues(w, numValues, byteIndexValue,
|
||||
byteVariantList, withDeletions, bytesSize);
|
||||
final IndexReader r = IndexReader.open(w, withDeletions);
|
||||
assertEquals(0, r.numDeletedDocs());
|
||||
final int numRemainingValues = (int) (numValues - deleted.cardinality());
|
||||
final int numRemainingValues = numValues - deleted.cardinality();
|
||||
final int base = r.numDocs() - numRemainingValues;
|
||||
IndexDocValues bytesReader = getDocValues(r, byteIndexValue.name());
|
||||
assertNotNull("field " + byteIndexValue.name()
|
||||
|
@ -484,11 +484,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
|
||||
Index.NO };
|
||||
|
||||
private OpenBitSet indexValues(IndexWriter w, int numValues, ValueType value,
|
||||
private FixedBitSet indexValues(IndexWriter w, int numValues, ValueType value,
|
||||
List<ValueType> valueVarList, boolean withDeletions, int bytesSize)
|
||||
throws CorruptIndexException, IOException {
|
||||
final boolean isNumeric = NUMERICS.contains(value);
|
||||
OpenBitSet deleted = new OpenBitSet(numValues);
|
||||
FixedBitSet deleted = new FixedBitSet(numValues);
|
||||
Document doc = new Document();
|
||||
Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
|
||||
AbstractField field = random.nextBoolean() ? new IndexDocValuesField(value.name())
|
||||
|
|
|
@ -30,8 +30,7 @@ import org.apache.lucene.index.SlowMultiReaderWrapper;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestCachingWrapperFilter extends LuceneTestCase {
|
||||
|
@ -125,7 +124,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
|
|||
if (originalSet.isCacheable()) {
|
||||
assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass());
|
||||
} else {
|
||||
assertTrue("Cached DocIdSet must be an OpenBitSet if the original one was not cacheable", cachedSet instanceof OpenBitSetDISI || cachedSet == DocIdSet.EMPTY_DOCIDSET);
|
||||
assertTrue("Cached DocIdSet must be an FixedBitSet if the original one was not cacheable", cachedSet instanceof FixedBitSet || cachedSet == DocIdSet.EMPTY_DOCIDSET);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,11 +142,11 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
|
|||
assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true), true);
|
||||
// is cacheable:
|
||||
assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
|
||||
// a openbitset filter is always cacheable
|
||||
// a fixedbitset filter is always cacheable
|
||||
assertDocIdSetCacheable(reader, new Filter() {
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context) {
|
||||
return new OpenBitSet();
|
||||
return new FixedBitSet(context.reader.maxDoc());
|
||||
}
|
||||
}, true);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
|
||||
|
||||
|
@ -97,7 +97,7 @@ public class TestFilteredSearch extends LuceneTestCase {
|
|||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context) {
|
||||
assert context.isAtomic;
|
||||
final OpenBitSet set = new OpenBitSet();
|
||||
final FixedBitSet set = new FixedBitSet(context.reader.maxDoc());
|
||||
int docBase = context.docBase;
|
||||
final int limit = docBase+context.reader.maxDoc();
|
||||
for (;index < docs.length; index++) {
|
||||
|
@ -108,7 +108,7 @@ public class TestFilteredSearch extends LuceneTestCase {
|
|||
set.set(docId-docBase);
|
||||
}
|
||||
}
|
||||
return set.isEmpty()?null:set;
|
||||
return set.cardinality() == 0 ? null:set;
|
||||
}
|
||||
|
||||
public void reset(){
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.search.FieldCache.*;
|
|||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.*;
|
||||
|
||||
|
@ -220,7 +220,7 @@ public class TestEntryCreators extends LuceneTestCase {
|
|||
}
|
||||
assertEquals( "Cached numTerms does not match : "+tester, distinctTerms.size(), cachedVals.numTerms );
|
||||
assertEquals( "Cached numDocs does not match : "+tester, numDocs, cachedVals.numDocs );
|
||||
assertEquals( "Ordinal should match numDocs : "+tester, numDocs, ((OpenBitSet)cachedVals.valid).cardinality() );
|
||||
assertEquals( "Ordinal should match numDocs : "+tester, numDocs, ((FixedBitSet)cachedVals.valid).cardinality() );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,271 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
public class TestFixedBitSet extends LuceneTestCase {
|
||||
|
||||
void doGet(BitSet a, FixedBitSet b) {
|
||||
int max = b.length();
|
||||
for (int i=0; i<max; i++) {
|
||||
if (a.get(i) != b.get(i)) {
|
||||
fail("mismatch: BitSet=["+i+"]="+a.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void doNextSetBit(BitSet a, FixedBitSet b) {
|
||||
int aa=-1,bb=-1;
|
||||
do {
|
||||
aa = a.nextSetBit(aa+1);
|
||||
bb = bb < b.length()-1 ? b.nextSetBit(bb+1) : -1;
|
||||
assertEquals(aa,bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doPrevSetBit(BitSet a, FixedBitSet b) {
|
||||
int aa = a.size() + random.nextInt(100);
|
||||
int bb = aa;
|
||||
do {
|
||||
// aa = a.prevSetBit(aa-1);
|
||||
aa--;
|
||||
while ((aa >= 0) && (! a.get(aa))) {
|
||||
aa--;
|
||||
}
|
||||
if (b.length() == 0) {
|
||||
bb = -1;
|
||||
} else if (bb > b.length()-1) {
|
||||
bb = b.prevSetBit(b.length()-1);
|
||||
} else if (bb < 1) {
|
||||
bb = -1;
|
||||
} else {
|
||||
bb = bb >= 1 ? b.prevSetBit(bb-1) : -1;
|
||||
}
|
||||
assertEquals(aa,bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
// test interleaving different FixedBitSetIterator.next()/skipTo()
|
||||
void doIterate(BitSet a, FixedBitSet b, int mode) throws IOException {
|
||||
if (mode==1) doIterate1(a, b);
|
||||
if (mode==2) doIterate2(a, b);
|
||||
}
|
||||
|
||||
void doIterate1(BitSet a, FixedBitSet b) throws IOException {
|
||||
int aa=-1,bb=-1;
|
||||
DocIdSetIterator iterator = b.iterator();
|
||||
do {
|
||||
aa = a.nextSetBit(aa+1);
|
||||
bb = (bb < b.length() && random.nextBoolean()) ? iterator.nextDoc() : iterator.advance(bb + 1);
|
||||
assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doIterate2(BitSet a, FixedBitSet b) throws IOException {
|
||||
int aa=-1,bb=-1;
|
||||
DocIdSetIterator iterator = b.iterator();
|
||||
do {
|
||||
aa = a.nextSetBit(aa+1);
|
||||
bb = random.nextBoolean() ? iterator.nextDoc() : iterator.advance(bb + 1);
|
||||
assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doRandomSets(int maxSize, int iter, int mode) throws IOException {
|
||||
BitSet a0=null;
|
||||
FixedBitSet b0=null;
|
||||
|
||||
for (int i=0; i<iter; i++) {
|
||||
int sz = _TestUtil.nextInt(random, 2, maxSize);
|
||||
BitSet a = new BitSet(sz);
|
||||
FixedBitSet b = new FixedBitSet(sz);
|
||||
|
||||
// test the various ways of setting bits
|
||||
if (sz>0) {
|
||||
int nOper = random.nextInt(sz);
|
||||
for (int j=0; j<nOper; j++) {
|
||||
int idx;
|
||||
|
||||
idx = random.nextInt(sz);
|
||||
a.set(idx);
|
||||
b.set(idx);
|
||||
|
||||
idx = random.nextInt(sz);
|
||||
a.clear(idx);
|
||||
b.clear(idx);
|
||||
|
||||
idx = random.nextInt(sz);
|
||||
a.flip(idx);
|
||||
b.flip(idx, idx+1);
|
||||
|
||||
idx = random.nextInt(sz);
|
||||
a.flip(idx);
|
||||
b.flip(idx, idx+1);
|
||||
|
||||
boolean val2 = b.get(idx);
|
||||
boolean val = b.getAndSet(idx);
|
||||
assertTrue(val2 == val);
|
||||
assertTrue(b.get(idx));
|
||||
|
||||
if (!val) b.clear(idx);
|
||||
assertTrue(b.get(idx) == val);
|
||||
}
|
||||
}
|
||||
|
||||
// test that the various ways of accessing the bits are equivalent
|
||||
doGet(a,b);
|
||||
|
||||
// test ranges, including possible extension
|
||||
int fromIndex, toIndex;
|
||||
fromIndex = random.nextInt(sz/2);
|
||||
toIndex = fromIndex + random.nextInt(sz - fromIndex);
|
||||
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
|
||||
FixedBitSet bb = (FixedBitSet)b.clone(); bb.flip(fromIndex,toIndex);
|
||||
|
||||
doIterate(aa,bb, mode); // a problem here is from flip or doIterate
|
||||
|
||||
fromIndex = random.nextInt(sz/2);
|
||||
toIndex = fromIndex + random.nextInt(sz - fromIndex);
|
||||
aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex);
|
||||
bb = (FixedBitSet)b.clone(); bb.clear(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit
|
||||
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
fromIndex = random.nextInt(sz/2);
|
||||
toIndex = fromIndex + random.nextInt(sz - fromIndex);
|
||||
aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex);
|
||||
bb = (FixedBitSet)b.clone(); bb.set(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit
|
||||
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
if (b0 != null && b0.length() <= b.length()) {
|
||||
assertEquals(a.cardinality(), b.cardinality());
|
||||
|
||||
BitSet a_or = (BitSet) a.clone();
|
||||
a_or.or(a0);
|
||||
|
||||
FixedBitSet b_or = (FixedBitSet) b.clone();
|
||||
b_or.or(b0);
|
||||
|
||||
assertEquals(a0.cardinality(), b0.cardinality());
|
||||
assertEquals(a_or.cardinality(), b_or.cardinality());
|
||||
|
||||
doIterate(a_or, b_or, mode);
|
||||
}
|
||||
|
||||
a0=a;
|
||||
b0=b;
|
||||
}
|
||||
}
|
||||
|
||||
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
|
||||
// larger testsuite.
|
||||
public void testSmall() throws IOException {
|
||||
doRandomSets(atLeast(1200), atLeast(1000), 1);
|
||||
doRandomSets(atLeast(1200), atLeast(1000), 2);
|
||||
}
|
||||
|
||||
// uncomment to run a bigger test (~2 minutes).
|
||||
/*
|
||||
public void testBig() {
|
||||
doRandomSets(2000,200000, 1);
|
||||
doRandomSets(2000,200000, 2);
|
||||
}
|
||||
*/
|
||||
|
||||
public void testEquals() {
|
||||
final int numBits = random.nextInt(2000);
|
||||
FixedBitSet b1 = new FixedBitSet(numBits);
|
||||
FixedBitSet b2 = new FixedBitSet(numBits);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
|
||||
int idx = random.nextInt(numBits);
|
||||
if (!b1.get(idx)) {
|
||||
b1.set(idx);
|
||||
assertFalse(b1.equals(b2));
|
||||
assertFalse(b2.equals(b1));
|
||||
b2.set(idx);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
}
|
||||
}
|
||||
|
||||
// try different type of object
|
||||
assertFalse(b1.equals(new Object()));
|
||||
}
|
||||
|
||||
public void testHashCodeEquals() {
|
||||
final int numBits = random.nextInt(2000);
|
||||
FixedBitSet b1 = new FixedBitSet(numBits);
|
||||
FixedBitSet b2 = new FixedBitSet(numBits);
|
||||
assertTrue(b1.equals(b2));
|
||||
assertTrue(b2.equals(b1));
|
||||
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
|
||||
int idx = random.nextInt(numBits);
|
||||
if (!b1.get(idx)) {
|
||||
b1.set(idx);
|
||||
assertFalse(b1.equals(b2));
|
||||
assertFalse(b1.hashCode() == b2.hashCode());
|
||||
b2.set(idx);
|
||||
assertEquals(b1, b2);
|
||||
assertEquals(b1.hashCode(), b2.hashCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private FixedBitSet makeFixedBitSet(int[] a, int numBits) {
|
||||
FixedBitSet bs = new FixedBitSet(numBits);
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private BitSet makeBitSet(int[] a) {
|
||||
BitSet bs = new BitSet();
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private void checkPrevSetBitArray(int [] a, int numBits) {
|
||||
FixedBitSet obs = makeFixedBitSet(a, numBits);
|
||||
BitSet bs = makeBitSet(a);
|
||||
doPrevSetBit(bs, obs);
|
||||
}
|
||||
|
||||
public void testPrevSetBit() {
|
||||
checkPrevSetBitArray(new int[] {}, 0);
|
||||
checkPrevSetBitArray(new int[] {0}, 1);
|
||||
checkPrevSetBitArray(new int[] {0,2}, 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -184,6 +184,7 @@ public class TestNumericUtils extends LuceneTestCase {
|
|||
private void assertLongRangeSplit(final long lower, final long upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterable<Long> expectedBounds, final Iterable<Integer> expectedShifts
|
||||
) throws Exception {
|
||||
// Cannot use FixedBitSet since the range could be long:
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
final Iterator<Long> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
|
||||
final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();
|
||||
|
@ -212,7 +213,7 @@ public class TestNumericUtils extends LuceneTestCase {
|
|||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
assertEquals("The sub-range concenated should match the whole range", 0, bits.cardinality());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -424,7 +425,7 @@ public class TestNumericUtils extends LuceneTestCase {
|
|||
private void assertIntRangeSplit(final int lower, final int upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterable<Integer> expectedBounds, final Iterable<Integer> expectedShifts
|
||||
) throws Exception {
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
final FixedBitSet bits=useBitSet ? new FixedBitSet(upper-lower+1) : null;
|
||||
final Iterator<Integer> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
|
||||
final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();
|
||||
|
||||
|
@ -451,8 +452,8 @@ public class TestNumericUtils extends LuceneTestCase {
|
|||
|
||||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
bits.flip(0, upper-lower+1);
|
||||
assertEquals("The sub-range concenated should match the whole range", 0, bits.cardinality());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -88,7 +88,6 @@ public class BlockGroupingCollector extends Collector {
|
|||
private int totalGroupCount;
|
||||
private int docBase;
|
||||
private int groupEndDocID;
|
||||
//private OpenBitSet lastDocPerGroupBits;
|
||||
private DocIdSetIterator lastDocPerGroupBits;
|
||||
private Scorer scorer;
|
||||
private final GroupQueue groupQueue;
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.search.Scorer;
|
|||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.grouping.TopGroups;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* This query requires that you index
|
||||
|
@ -45,7 +45,7 @@ import org.apache.lucene.util.OpenBitSet;
|
|||
* child documents must appear first, ending with the parent
|
||||
* document. At search time you provide a Filter
|
||||
* identifying the parents, however this Filter must provide
|
||||
* an {@link OpenBitSet} per sub-reader.
|
||||
* an {@link FixedBitSet} per sub-reader.
|
||||
*
|
||||
* <p>Once the block index is built, use this query to wrap
|
||||
* any sub-query matching only child docs and join matches in that
|
||||
|
@ -165,11 +165,11 @@ public class BlockJoinQuery extends Query {
|
|||
// No matches
|
||||
return null;
|
||||
}
|
||||
if (!(parents instanceof OpenBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return OpenBitSet; got " + parents);
|
||||
if (!(parents instanceof FixedBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
|
||||
return new BlockJoinScorer(this, childScorer, (OpenBitSet) parents, firstChildDoc, scoreMode);
|
||||
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -187,7 +187,7 @@ public class BlockJoinQuery extends Query {
|
|||
|
||||
static class BlockJoinScorer extends Scorer {
|
||||
private final Scorer childScorer;
|
||||
private final OpenBitSet parentBits;
|
||||
private final FixedBitSet parentBits;
|
||||
private final ScoreMode scoreMode;
|
||||
private int parentDoc;
|
||||
private float parentScore;
|
||||
|
@ -197,7 +197,7 @@ public class BlockJoinQuery extends Query {
|
|||
private float[] pendingChildScores;
|
||||
private int childDocUpto;
|
||||
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, OpenBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
|
||||
super(weight);
|
||||
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
|
||||
this.parentBits = parentBits;
|
||||
|
|
Loading…
Reference in New Issue