SOLR-14256: Remove HashDocSet; add DocSet.getBits.

* DocSet is now fixed at two implementations, read-only and doc ordered.
* DocSetBase removed; not needed anymore.  DocSet is now an abstract class.
This commit is contained in:
David Smiley 2020-02-15 00:08:21 -05:00
parent 25892271e8
commit 50a7075862
No known key found for this signature in database
GPG Key ID: 6FDFF3BF6796FD4A
19 changed files with 155 additions and 916 deletions

View File

@ -37,6 +37,9 @@ Other Changes
* SOLR-14258: DocList no longer extends DocSet. (David Smiley)
* SOLR-14256: Remove HashDocSet; add DocSet.getBits() instead. DocSet is now strictly immutable and ascending order.
It's now locked-down to external extension; only 2 impls exist. (David Smiley)
================== 8.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -62,7 +62,6 @@ import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSlice;
@ -324,23 +323,8 @@ public class TaggerRequestHandler extends RequestHandlerBase {
}
final DocSet docSet = searcher.getDocSet(filterQueries);//hopefully in the cache
//note: before Solr 4.7 we could call docSet.getBits() but no longer.
if (docSet instanceof BitDocSet) {
docBits = ((BitDocSet)docSet).getBits();
} else {
docBits = new Bits() {
@Override
public boolean get(int index) {
return docSet.exists(index);
}
@Override
public int length() {
return searcher.maxDoc();
}
};
}
docBits = docSet.getBits();
} else {
docBits = searcher.getSlowAtomicReader().getLiveDocs();
}

View File

@ -58,6 +58,7 @@ import org.apache.lucene.search.grouping.AllGroupHeadsCollector;
import org.apache.lucene.search.grouping.AllGroupsCollector;
import org.apache.lucene.search.grouping.TermGroupFacetCollector;
import org.apache.lucene.search.grouping.TermGroupSelector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;
@ -83,12 +84,10 @@ import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
import org.apache.solr.search.Grouping;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.Insanity;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortedIntDocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.facet.FacetRequest;
@ -962,10 +961,11 @@ public class SimpleFacets {
int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
// make sure we have a set that is fast for random access, if we will use it for that
DocSet fastForRandomSet = docs;
if (minDfFilterCache>0 && docs instanceof SortedIntDocSet) {
SortedIntDocSet sset = (SortedIntDocSet)docs;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
Bits fastForRandomSet;
if (minDfFilterCache <= 0) {
fastForRandomSet = null;
} else {
fastForRandomSet = docs.getBits();
}
IndexSchema schema = searcher.getSchema();
@ -1064,7 +1064,7 @@ public class SimpleFacets {
int base = sub.slice.start;
int docid;
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base)) {
if (fastForRandomSet.get(docid + base)) {
c++;
if (intersectsCheck) {
assert c==1;
@ -1076,7 +1076,7 @@ public class SimpleFacets {
} else {
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) {
if (fastForRandomSet.get(docid)) {
c++;
if (intersectsCheck) {
assert c==1;

View File

@ -32,17 +32,18 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/**
* <code>BitDocSet</code> represents an unordered set of Lucene Document Ids
* using a BitSet. A set bit represents inclusion in the set for that document.
* A {@link FixedBitSet} based implementation of a {@link DocSet}. Good for medium/large sets.
*
* @since solr 0.9
*/
public class BitDocSet extends DocSetBase {
public class BitDocSet extends DocSet {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BitDocSet.class)
+ RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class)
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // for the array object inside the FixedBitSet. long[] array won't change alignment, so no need to calculate it.
final FixedBitSet bits;
// TODO consider SparseFixedBitSet alternative
private final FixedBitSet bits;
int size; // number of docs in the set (cached for perf)
public BitDocSet() {
@ -67,35 +68,6 @@ public class BitDocSet extends DocSetBase {
this.size = size;
}
/* DocIterator using nextSetBit()
public DocIterator iterator() {
return new DocIterator() {
int pos=bits.nextSetBit(0);
public boolean hasNext() {
return pos>=0;
}
public Integer next() {
return nextDoc();
}
public void remove() {
bits.clear(pos);
}
public int nextDoc() {
int old=pos;
pos=bits.nextSetBit(old+1);
return old;
}
public float score() {
return 0.0f;
}
};
}
***/
@Override
public DocIterator iterator() {
return new DocIterator() {
@ -139,15 +111,13 @@ public class BitDocSet extends DocSetBase {
}
@Override
public void add(int doc) {
bits.set(doc);
size=-1; // invalidate size
protected FixedBitSet getFixedBitSet() {
return bits;
}
@Override
public void addUnique(int doc) {
bits.set(doc);
size=-1; // invalidate size
protected FixedBitSet getFixedBitSetClone() {
return bits.clone();
}
@Override
@ -156,14 +126,6 @@ public class BitDocSet extends DocSetBase {
return size = bits.cardinality();
}
/**
* The number of set bits - size - is cached. If the bitset is changed externally,
* this method should be used to invalidate the previously cached size.
*/
public void invalidateSize() {
size=-1;
}
/**
* Returns true of the doc exists in the set. Should only be called when doc &lt;
* {@link FixedBitSet#length()}.
@ -173,6 +135,20 @@ public class BitDocSet extends DocSetBase {
return bits.get(doc);
}
@Override
public DocSet intersection(DocSet other) {
// intersection is overloaded in the smaller DocSets to be more
// efficient, so dispatch off of it instead.
if (!(other instanceof BitDocSet)) {
return other.intersection(this);
}
// Default... handle with bitsets.
FixedBitSet newbits = getFixedBitSetClone();
newbits.and(other.getFixedBitSet());
return new BitDocSet(newbits);
}
@Override
public int intersectionSize(DocSet other) {
if (other instanceof BitDocSet) {
@ -217,12 +193,8 @@ public class BitDocSet extends DocSetBase {
}
@Override
public void addAllTo(DocSet target) {
if (target instanceof BitDocSet) {
((BitDocSet) target).bits.or(bits);
} else {
super.addAllTo(target);
}
public void addAllTo(FixedBitSet target) {
target.or(bits);
}
@Override

View File

@ -17,119 +17,116 @@
package org.apache.solr.search;
import org.apache.lucene.util.Accountable;
import org.apache.solr.common.SolrException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
/**
* <code>DocSet</code> represents an unordered set of Lucene Document Ids.
* An immutable ordered set of Lucene Document Ids.
* It's similar to a Lucene {@link org.apache.lucene.search.DocIdSet}.
*
* <p>
* WARNING: Any DocSet returned from SolrIndexSearcher should <b>not</b> be modified as it may have been retrieved from
* a cache and could be shared.
* </p>
*
* @since solr 0.9
*/
public interface DocSet extends Accountable, Cloneable /* extends Collection<Integer> */ {
/**
* Adds the specified document if it is not currently in the DocSet
* (optional operation).
*
* @see #addUnique
* @throws SolrException if the implementation does not allow modifications
*/
public void add(int doc);
public abstract class DocSet implements Accountable, Cloneable /* extends Collection<Integer> */ {
/**
* Adds a document the caller knows is not currently in the DocSet
* (optional operation).
*
* <p>
* This method may be faster then <code>add(doc)</code> in some
* implementations provided the caller is certain of the precondition.
* </p>
*
* @see #add
* @throws SolrException if the implementation does not allow modifications
*/
public void addUnique(int doc);
// package accessible; guarantee known implementations
DocSet() {
assert this instanceof BitDocSet || this instanceof SortedIntDocSet;
}
/**
* Returns the number of documents in the set.
*/
public int size();
public abstract int size();
/**
* Returns true if a document is in the DocSet.
* If you want to be guaranteed fast random access, use {@link #getBits()} instead.
*/
public boolean exists(int docid);
public abstract boolean exists(int docid);
/**
* Returns an iterator that may be used to iterate over all of the documents in the set.
*
* <p>
* The order of the documents returned by this iterator is
* non-deterministic, and any scoring information is meaningless
* </p>
* Returns an ordered iterator of the documents in the set. Any scoring information is meaningless.
*/
public DocIterator iterator();
//TODO switch to DocIdSetIterator in Solr 9?
public abstract DocIterator iterator();
/**
* Returns the intersection of this set with another set. Neither set is modified - a new DocSet is
* created and returned.
* @return a DocSet representing the intersection
*/
public DocSet intersection(DocSet other);
public abstract DocSet intersection(DocSet other);
/**
* Returns the number of documents of the intersection of this set with another set.
* May be more efficient than actually creating the intersection and then getting its size.
*/
public int intersectionSize(DocSet other);
public abstract int intersectionSize(DocSet other);
/** Returns true if these sets have any elements in common */
public boolean intersects(DocSet other);
public abstract boolean intersects(DocSet other);
/**
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
* created and returned.
* @return a DocSet representing the union
*/
public DocSet union(DocSet other);
public abstract DocSet union(DocSet other);
/**
* Returns the number of documents of the union of this set with another set.
* May be more efficient than actually creating the union and then getting its size.
*/
public int unionSize(DocSet other);
public int unionSize(DocSet other) {
return this.size() + other.size() - this.intersectionSize(other);
}
/**
* Returns the documents in this set that are not in the other set. Neither set is modified - a new DocSet is
* created and returned.
* @return a DocSet representing this AND NOT other
*/
public DocSet andNot(DocSet other);
public abstract DocSet andNot(DocSet other);
/**
* Returns the number of documents in this set that are not in the other set.
*/
public int andNotSize(DocSet other);
public int andNotSize(DocSet other) {
return this.size() - this.intersectionSize(other);
}
/**
* Returns a Filter for use in Lucene search methods, assuming this DocSet
* was generated from the top-level MultiReader that the Lucene search
* methods will be invoked with.
*/
public Filter getTopFilter();
public abstract Filter getTopFilter();
/**
* Adds all the docs from this set to the target set. The target should be
* Adds all the docs from this set to the target. The target should be
* sized large enough to accommodate all of the documents before calling this
* method.
*/
public void addAllTo(DocSet target);
public abstract void addAllTo(FixedBitSet target);
public DocSet clone();
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
public abstract DocSet clone();
public static final DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
/**
* A {@link Bits} that has fast random access (as is generally required of Bits).
* It may be necessary to do work to build this.
*/
public abstract Bits getBits();
// internal only
protected abstract FixedBitSet getFixedBitSet();
// internal only
protected abstract FixedBitSet getFixedBitSetClone();
}

View File

@ -1,266 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Objects;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
/** A base class that may be useful for implementing DocSets */
abstract class DocSetBase implements DocSet {
public static FixedBitSet toBitSet(DocSet set) {
if (set instanceof DocSetBase) {
return ((DocSetBase) set).getBits();
} else {
FixedBitSet bits = new FixedBitSet(64);
for (DocIterator iter = set.iterator(); iter.hasNext();) {
int nextDoc = iter.nextDoc();
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
bits.set(nextDoc);
}
return bits;
}
}
// Not implemented efficiently... for testing purposes only
@Override
public boolean equals(Object obj) {
if (!(obj instanceof DocSet)) return false;
DocSet other = (DocSet)obj;
if (this.size() != other.size()) return false;
if (this instanceof DocList && other instanceof DocList) {
// compare ordering
DocIterator i1=this.iterator();
DocIterator i2=other.iterator();
while(i1.hasNext() && i2.hasNext()) {
if (i1.nextDoc() != i2.nextDoc()) return false;
}
return true;
// don't compare matches
}
FixedBitSet bs1 = this.getBits();
FixedBitSet bs2 = toBitSet(other);
// resize both BitSets to make sure they have the same amount of zero padding
int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
// if (this.size() != other.size()) return false;
return bs1.equals(bs2);
}
public DocSet clone() {
throw new RuntimeException(new CloneNotSupportedException());
}
/**
* @throws SolrException Base implementation does not allow modifications
*/
@Override
public void add(int doc) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Unsupported Operation");
}
/**
* @throws SolrException Base implementation does not allow modifications
*/
@Override
public void addUnique(int doc) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Unsupported Operation");
}
/**
* Return a {@link FixedBitSet} with a bit set for every document in this
* {@link DocSet}. The default implementation iterates on all docs and sets
* the relevant bits. You should override if you can provide a more efficient
* implementation.
*/
protected FixedBitSet getBits() {
FixedBitSet bits = new FixedBitSet(size());
for (DocIterator iter = iterator(); iter.hasNext();) {
int nextDoc = iter.nextDoc();
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
bits.set(nextDoc);
}
return bits;
}
@Override
public DocSet intersection(DocSet other) {
// intersection is overloaded in the smaller DocSets to be more
// efficient, so dispatch off of it instead.
if (!(other instanceof BitDocSet)) {
return other.intersection(this);
}
// Default... handle with bitsets.
FixedBitSet newbits = getBits().clone();
newbits.and(toBitSet(other));
return new BitDocSet(newbits);
}
@Override
public boolean intersects(DocSet other) {
// intersection is overloaded in the smaller DocSets to be more
// efficient, so dispatch off of it instead.
if (!(other instanceof BitDocSet)) {
return other.intersects(this);
}
// less efficient way: get the intersection size
return intersectionSize(other) > 0;
}
@Override
public DocSet union(DocSet other) {
FixedBitSet otherBits = toBitSet(other);
FixedBitSet newbits = FixedBitSet.ensureCapacity(getBits().clone(), otherBits.length());
newbits.or(otherBits);
return new BitDocSet(newbits);
}
@Override
public int intersectionSize(DocSet other) {
// intersection is overloaded in the smaller DocSets to be more
// efficient, so dispatch off of it instead.
if (!(other instanceof BitDocSet)) {
return other.intersectionSize(this);
}
// less efficient way: do the intersection then get its size
return intersection(other).size();
}
@Override
public int unionSize(DocSet other) {
return this.size() + other.size() - this.intersectionSize(other);
}
@Override
public DocSet andNot(DocSet other) {
FixedBitSet newbits = getBits().clone();
newbits.andNot(toBitSet(other));
return new BitDocSet(newbits);
}
@Override
public int andNotSize(DocSet other) {
return this.size() - this.intersectionSize(other);
}
@Override
public Filter getTopFilter() {
return new Filter() {
final FixedBitSet bs = getBits();
@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, Bits acceptDocs) {
LeafReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
pos = bs.nextSetBit(pos+1); // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
}
@Override
public long cost() {
return bs.length();
}
};
}
@Override
public long ramBytesUsed() {
return bs.ramBytesUsed();
}
@Override
public Bits bits() {
// sparse filters should not use random access
return null;
}
}, acceptDocs2);
}
@Override
public String toString(String field) {
return "DocSetTopFilter";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
Objects.equals(bs, getClass().cast(other).bs);
}
@Override
public int hashCode() {
return classHash() ^ bs.hashCode();
}
};
}
@Override
public void addAllTo(DocSet target) {
DocIterator iter = iterator();
while (iter.hasNext()) {
target.add(iter.nextDoc());
}
}
}

View File

@ -556,8 +556,7 @@ abstract class PointSetQuery extends Query implements DocSetProducer, Accountabl
}
if (searcher instanceof SolrIndexSearcher) {
return ((SolrIndexSearcher) searcher).getLiveDocSet().getBits();
} else {
// TODO Does this ever happen? In Solr should always be SolrIndexSearcher?
} else { // could happen in Delete-by-query situation
//smallSetSize==0 thus will always produce a BitDocSet (FixedBitSet)
DocSetCollector docSetCollector = new DocSetCollector(0, searcher.getIndexReader().maxDoc());
searcher.search(new MatchAllDocsQuery(), docSetCollector);

View File

@ -1,310 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.RamUsageEstimator;
/**
* <code>HashDocSet</code> represents an unordered set of Lucene Document Ids
* using a primitive int hash table. It can be a better choice if there are few docs
* in the set because it takes up less memory and is faster to iterate and take
* set intersections.
*
*
* @since solr 0.9
*/
public final class HashDocSet extends DocSetBase {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(HashDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
/** Default load factor to use for HashDocSets. We keep track of the inverse
* since multiplication is so much faster than division. The default
* is 1.0f / 0.75f
*/
static float DEFAULT_INVERSE_LOAD_FACTOR = 1.0f /0.75f;
// public final static int MAX_SIZE = SolrConfig.config.getInt("//HashDocSet/@maxSize",-1);
// lucene docs are numbered from 0, so a neg number must be used for missing.
// an alternative to having to init the array to EMPTY at the start is
//
private final static int EMPTY=-1;
private final int[] table;
private final int size;
private final int mask;
public HashDocSet(HashDocSet set) {
this.table = set.table.clone();
this.size = set.size;
this.mask = set.mask;
}
/** Create a HashDocSet from a list of *unique* ids */
public HashDocSet(int[] docs, int offset, int len) {
this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
}
/** Create a HashDocSet from a list of *unique* ids */
public HashDocSet(int[] docs, int offset, int len, float inverseLoadFactor) {
int tsize = Math.max(BitUtil.nextHighestPowerOfTwo(len), 1);
if (tsize < len * inverseLoadFactor) {
tsize <<= 1;
}
mask=tsize-1;
table = new int[tsize];
// (for now) better then: Arrays.fill(table, EMPTY);
// https://issues.apache.org/jira/browse/SOLR-390
for (int i=tsize-1; i>=0; i--) table[i]=EMPTY;
int end = offset + len;
for (int i=offset; i<end; i++) {
put(docs[i]);
}
size = len;
}
void put(int doc) {
int s = doc & mask;
while (table[s]!=EMPTY) {
// Adding an odd number to this power-of-two hash table is
// guaranteed to do a full traversal, so instead of re-hashing
// we jump straight to a "linear" traversal.
// The key is that we provide many different ways to do the
// traversal (tablesize/2) based on the last hash code (the doc).
// Rely on loop invariant code motion to eval ((doc>>7)|1) only once.
// otherwise, we would need to pull the first case out of the loop.
s = (s + ((doc>>7)|1)) & mask;
}
table[s]=doc;
}
@Override
public boolean exists(int doc) {
int s = doc & mask;
for(;;) {
int v = table[s];
if (v==EMPTY) return false;
if (v==doc) return true;
// see put() for algorithm details.
s = (s + ((doc>>7)|1)) & mask;
}
}
@Override
public int size() {
return size;
}
@Override
public DocIterator iterator() {
return new DocIterator() {
int pos=0;
int doc;
{ goNext(); }
@Override
public boolean hasNext() {
return pos < table.length;
}
@Override
public Integer next() {
return nextDoc();
}
@Override
public void remove() {
}
void goNext() {
while (pos<table.length && table[pos]==EMPTY) pos++;
}
// modify to return -1 at end of iteration?
@Override
public int nextDoc() {
int doc = table[pos];
pos++;
goNext();
return doc;
}
@Override
public float score() {
return 0.0f;
}
};
}
@Override
public DocSet intersection(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set for the most efficient
// intersection.
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
int[] result = new int[a.size()];
int resultCount=0;
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id >= 0 && b.exists(id)) {
result[resultCount++]=id;
}
}
return new HashDocSet(result,0,resultCount);
} else {
int[] result = new int[size()];
int resultCount=0;
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && other.exists(id)) {
result[resultCount++]=id;
}
}
return new HashDocSet(result,0,resultCount);
}
}
@Override
public int intersectionSize(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set for the most efficient
// intersection.
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
int resultCount=0;
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id >= 0 && b.exists(id)) {
resultCount++;
}
}
return resultCount;
} else {
int resultCount=0;
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && other.exists(id)) {
resultCount++;
}
}
return resultCount;
}
}
@Override
public boolean intersects(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set for the most efficient
// intersection.
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id >= 0 && b.exists(id)) {
return true;
}
}
return false;
} else {
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && other.exists(id)) {
return true;
}
}
return false;
}
}
@Override
public DocSet andNot(DocSet other) {
int[] result = new int[size()];
int resultCount=0;
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && !other.exists(id)) {
result[resultCount++]=id;
}
}
return new HashDocSet(result,0,resultCount);
}
@Override
public DocSet union(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
int[] result = new int[a.size()+b.size()];
int resultCount=0;
// iterate over the largest table first, adding w/o checking.
for (int i=0; i<b.table.length; i++) {
int id=b.table[i];
if (id>=0) result[resultCount++]=id;
}
// now iterate over smaller set, adding all not already in larger set.
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id>=0 && !b.exists(id)) result[resultCount++]=id;
}
return new HashDocSet(result,0,resultCount);
} else {
return other.union(this);
}
}
@Override
public HashDocSet clone() {
return new HashDocSet(this);
}
// don't implement andNotSize() and unionSize() on purpose... they are implemented
// in BaseDocSet in terms of intersectionSize().
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + (table.length<<2);
}
@Override
public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
}

View File

@ -409,10 +409,11 @@ class JoinQuery extends Query {
List<DocSet> resultList = new ArrayList<>(10);
// make sure we have a set that is fast for random access, if we will use it for that
DocSet fastForRandomSet = fromSet;
if (minDocFreqFrom>0 && fromSet instanceof SortedIntDocSet) {
SortedIntDocSet sset = (SortedIntDocSet)fromSet;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
Bits fastForRandomSet;
if (minDocFreqFrom <= 0) {
fastForRandomSet = null;
} else {
fastForRandomSet = fromSet.getBits();
}
@ -480,7 +481,7 @@ class JoinQuery extends Query {
int base = sub.slice.start;
int docid;
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid+base)) {
if (fastForRandomSet.get(docid+base)) {
intersects = true;
break outer;
}
@ -489,7 +490,7 @@ class JoinQuery extends Query {
} else {
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) {
if (fastForRandomSet.get(docid)) {
intersects = true;
break;
}
@ -521,10 +522,10 @@ class JoinQuery extends Query {
DocSet toTermSet = toSearcher.getDocSet(toDeState);
resultListDocs += toTermSet.size();
if (resultBits != null) {
toTermSet.addAllTo(new BitDocSet(resultBits));
toTermSet.addAllTo(resultBits);
} else {
if (toTermSet instanceof BitDocSet) {
resultBits = ((BitDocSet)toTermSet).bits.clone();
resultBits = ((BitDocSet)toTermSet).getBits().clone();
} else {
resultList.add(toTermSet);
}
@ -568,11 +569,10 @@ class JoinQuery extends Query {
smallSetsDeferred = resultList.size();
if (resultBits != null) {
BitDocSet bitSet = new BitDocSet(resultBits);
for (DocSet set : resultList) {
set.addAllTo(bitSet);
set.addAllTo(resultBits);
}
return bitSet;
return new BitDocSet(resultBits);
}
if (resultList.size()==0) {

View File

@ -19,6 +19,7 @@ package org.apache.solr.search;
import java.util.Collection;
import java.util.Collections;
import com.carrotsearch.hppc.IntHashSet;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
@ -29,9 +30,9 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/**
* <code>SortedIntDocSet</code> represents a sorted set of Lucene Document Ids.
* A simple sorted int[] array implementation of {@link DocSet}, good for small sets.
*/
public class SortedIntDocSet extends DocSetBase {
public class SortedIntDocSet extends DocSet {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SortedIntDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
protected final int[] docs;
@ -66,24 +67,6 @@ public class SortedIntDocSet extends DocSetBase {
return newArr;
}
/** Returns the index of the first non-sorted element or -1 if they are all sorted */
public static int firstNonSorted(int[] arr, int offset, int len) {
if (len <= 1) return -1;
int lower = arr[offset];
int end = offset + len;
for(int i=offset+1; i<end; i++) {
int next = arr[i];
if (next <= lower) {
for (int j=i-1; j>offset; j--) {
if (arr[j]<next) return j+1;
}
return offset;
}
lower = next;
}
return -1;
}
public static int intersectionSize(int[] smallerSortedList, int[] biggerSortedList) {
final int a[] = smallerSortedList;
final int b[] = biggerSortedList;
@ -222,8 +205,7 @@ public class SortedIntDocSet extends DocSetBase {
@Override
public int intersectionSize(DocSet other) {
if (!(other instanceof SortedIntDocSet)) {
// assume other implementations are better at random access than we are,
// true of BitDocSet and HashDocSet.
// BitDocSet is better at random access than we are
int icount = 0;
for (int i=0; i<docs.length; i++) {
if (other.exists(docs[i])) icount++;
@ -272,10 +254,9 @@ public class SortedIntDocSet extends DocSetBase {
@Override
public boolean intersects(DocSet other) {
if (!(other instanceof SortedIntDocSet)) {
// assume other implementations are better at random access than we are,
// true of BitDocSet and HashDocSet.
for (int i=0; i<docs.length; i++) {
if (other.exists(docs[i])) return true;
// assume BitDocSet is better at random access than we are
for (int doc : docs) {
if (other.exists(doc)) return true;
}
return false;
}
@ -561,9 +542,9 @@ public class SortedIntDocSet extends DocSetBase {
}
@Override
public void addAllTo(DocSet target) {
public void addAllTo(FixedBitSet target) {
for (int doc : docs) {
target.add(doc);
target.set(doc);
}
}
@ -590,7 +571,6 @@ public class SortedIntDocSet extends DocSetBase {
}
return false;
}
@Override
public DocIterator iterator() {
@ -627,13 +607,40 @@ public class SortedIntDocSet extends DocSetBase {
}
@Override
public FixedBitSet getBits() {
int maxDoc = size() > 0 ? docs[size()-1] : 0;
FixedBitSet bs = new FixedBitSet(maxDoc+1);
public Bits getBits() {
IntHashSet hashSet = new IntHashSet(docs.length);
for (int doc : docs) {
bs.set(doc);
hashSet.add(doc);
}
return bs;
return new Bits() {
@Override
public boolean get(int index) {
return hashSet.contains(index);
}
@Override
public int length() {
return getLength();
}
};
}
/** the {@link Bits#length()} or maxdoc (1 greater than largest possible doc number) */
private int getLength() {
return size() == 0 ? 0 : getDocs()[size() - 1] + 1;
}
@Override
protected FixedBitSet getFixedBitSet() {
return getFixedBitSetClone();
}
@Override
protected FixedBitSet getFixedBitSetClone() {
FixedBitSet bitSet = new FixedBitSet(getLength());
addAllTo(bitSet);
return bitSet;
}
public static int findIndex(int[] arr, int value, int low, int high) {
@ -655,6 +662,15 @@ public class SortedIntDocSet extends DocSetBase {
return low;
}
@Override
public DocSet union(DocSet other) {
// TODO could be more efficient if both are SortedIntDocSet
FixedBitSet otherBits = other.getFixedBitSet();
FixedBitSet newbits = FixedBitSet.ensureCapacity(getFixedBitSetClone(), otherBits.length());
newbits.or(otherBits);
return new BitDocSet(newbits);
}
@Override
public Filter getTopFilter() {
return new Filter() {

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
@ -39,9 +40,7 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortedIntDocSet;
import org.apache.solr.search.facet.SlotAcc.SlotContext;
/**
@ -57,7 +56,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
boolean hasSubFacets; // true if there are subfacets
int minDfFilterCache;
DocSet docs;
DocSet fastForRandomSet;
Bits fastForRandomSet;
TermsEnum termsEnum = null;
SolrIndexSearcher.DocsEnumState deState = null;
PostingsEnum postingsEnum;
@ -265,11 +264,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
// lazy convert to fastForRandomSet
if (fastForRandomSet == null) {
fastForRandomSet = docs;
if (docs instanceof SortedIntDocSet) { // OFF-HEAP todo: also check for native version
SortedIntDocSet sset = (SortedIntDocSet) docs;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
}
fastForRandomSet = docs.getBits();
}
// iterate over TermDocs to calculate the intersection
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
@ -285,12 +280,12 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
if (countOnly) {
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base)) c++;
if (fastForRandomSet.get(docid + base)) c++;
}
} else {
setNextReader(leaves[sub.slice.readerIndex]);
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base)) {
if (fastForRandomSet.get(docid + base)) {
c++;
collect(docid, 0, slotContext);
}
@ -302,12 +297,12 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
int docid;
if (countOnly) {
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) c++;
if (fastForRandomSet.get(docid)) c++;
}
} else {
setNextReader(leaves[0]);
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) {
if (fastForRandomSet.get(docid)) {
c++;
collect(docid, 0, slotContext);
}

View File

@ -38,7 +38,6 @@ import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -47,8 +46,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.RptWithGeometrySpatialField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.DistanceUnits;
@ -259,15 +256,8 @@ public class FacetHeatmap extends FacetRequest {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
} else if (docSet instanceof BitDocSet) {
return ((BitDocSet) docSet).getBits();
} else {
// TODO DocSetBase.getBits ought to be at DocSet level? Though it doesn't know maxDoc but it could?
FixedBitSet bits = new FixedBitSet(searcher.maxDoc());
for (DocIterator iter = docSet.iterator(); iter.hasNext();) {
bits.set(iter.nextDoc());
}
return bits;
return docSet.getBits();
}
}

View File

@ -162,11 +162,6 @@
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins.
-->
<requestHandler name="/select" class="solr.SearchHandler">

View File

@ -162,10 +162,6 @@
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the 'qt' param matching the
name of registered handlers.

View File

@ -176,10 +176,6 @@
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins
-->
<requestHandler name="/select" class="solr.SearchHandler">

View File

@ -161,10 +161,6 @@
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the 'qt' param matching the
name of registered handlers.

View File

@ -175,10 +175,6 @@
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins
-->
<requestHandler name="/select" class="solr.SearchHandler">

View File

@ -35,7 +35,6 @@ public class DocSetPerf {
static FixedBitSet bs;
static BitDocSet bds;
static HashDocSet hds;
static int[] ids; // not unique
static Random rand = getRandom();
@ -59,7 +58,6 @@ public class DocSetPerf {
}
}
bds = new BitDocSet(bs,bitsToSet);
hds = new HashDocSet(ids,0,count);
}
public static void main(String[] args) {
@ -81,13 +79,11 @@ public class DocSetPerf {
FixedBitSet[] sets = new FixedBitSet[numSets];
DocSet[] bset = new DocSet[numSets];
DocSet[] hset = new DocSet[numSets];
for (int i=0; i<numSets; i++) {
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
sets[i] = bs;
bset[i] = bds;
hset[i] = hds;
}
final RTimer timer = new RTimer();
@ -97,7 +93,6 @@ public class DocSetPerf {
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
FixedBitSet bs1=bs;
BitDocSet bds1=bds;
HashDocSet hds1=hds;
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
FixedBitSet res = bs1.clone();
@ -106,17 +101,6 @@ public class DocSetPerf {
test(bds1.intersection(bds).size() == icount);
test(bds1.intersectionSize(bds) == icount);
if (bds1.intersection(hds).size() != icount) {
DocSet ds = bds1.intersection(hds);
System.out.println("STOP");
}
test(bds1.intersection(hds).size() == icount);
test(bds1.intersectionSize(hds) == icount);
test(hds1.intersection(bds).size() == icount);
test(hds1.intersectionSize(bds) == icount);
test(hds1.intersection(hds).size() == icount);
test(hds1.intersectionSize(hds) == icount);
ret += icount;
}
@ -126,7 +110,6 @@ public class DocSetPerf {
String oper=null;
if (test.endsWith("B")) { type="B"; }
if (test.endsWith("H")) { type="H"; }
if (test.endsWith("M")) { type="M"; }
if (test.startsWith("intersect")) oper="intersect";
if (test.startsWith("intersectSize")) oper="intersectSize";
@ -141,14 +124,10 @@ public class DocSetPerf {
if (type=="B") {
a=bset[idx1]; b=bset[idx2];
} else if (type=="H") {
a=hset[idx1]; b=bset[idx2];
} else if (type=="M") {
if (idx1 < idx2) {
a=bset[idx1];
b=hset[idx2];
} else {
a=hset[idx1];
b=bset[idx2];
}
}

View File

@ -50,7 +50,6 @@ import org.apache.solr.SolrTestCase;
*/
public class TestDocSet extends SolrTestCase {
Random rand;
float loadfactor;
@Override
public void setUp() throws Exception {
@ -89,15 +88,6 @@ public class TestDocSet extends SolrTestCase {
return bs;
}
public DocSet getHashDocSet(FixedBitSet bs) {
int[] docs = new int[bs.cardinality()];
BitSetIterator iter = new BitSetIterator(bs, 0);
for (int i=0; i<docs.length; i++) {
docs[i] = iter.nextDoc();
}
return new HashDocSet(docs,0,docs.length);
}
public DocSet getIntDocSet(FixedBitSet bs) {
int[] docs = new int[bs.cardinality()];
BitSetIterator iter = new BitSetIterator(bs, 0);
@ -130,11 +120,7 @@ public class TestDocSet extends SolrTestCase {
public DocSet getDocSet(FixedBitSet bs) {
switch(rand.nextInt(9)) {
case 0: return getHashDocSet(bs);
case 1: return getBitDocSet(bs);
case 2: return getBitDocSet(bs);
case 3: return getBitDocSet(bs);
case 0: case 1: case 2: case 3: return getBitDocSet(bs);
case 4: return getIntDocSet(bs);
case 5: return getIntDocSet(bs);
@ -153,8 +139,6 @@ public class TestDocSet extends SolrTestCase {
}
public void iter(DocSet d1, DocSet d2) {
// HashDocSet and DocList doesn't iterate in order.
if (d1 instanceof HashDocSet || d2 instanceof HashDocSet || d1 instanceof DocList || d2 instanceof DocList) return;
DocIterator i1 = d1.iterator();
DocIterator i2 = d2.iterator();
@ -235,9 +219,6 @@ public class TestDocSet extends SolrTestCase {
if (smallSetType ==0) {
Arrays.sort(a);
return new SortedIntDocSet(a);
} else if (smallSetType ==1) {
Arrays.sort(a);
return loadfactor!=0 ? new HashDocSet(a,0,n,1/loadfactor) : new HashDocSet(a,0,n);
}
}
@ -258,41 +239,11 @@ public class TestDocSet extends SolrTestCase {
return sets;
}
/* needs code insertion into HashDocSet
public void testCollisions() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=256;
int iter=1;
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
int ret=0;
long start=System.currentTimeMillis();
for (int maxDoc : maxDocs) {
int cstart = HashDocSet.collisions;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
for (DocSet s1 : sets) {
for (DocSet s2 : sets) {
if (s1!=s2) ret += s1.intersectionSize(s2);
}
}
int cend = HashDocSet.collisions;
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
}
long end=System.currentTimeMillis();
System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
if (ret==-1)System.out.println("wow!");
System.out.println("collisions="+HashDocSet.collisions);
}
***/
public static int smallSetType = 0; // 0==sortedint, 1==hash, 2==FixedBitSet
public static int smallSetType = 0; // 0==sortedint, 2==FixedBitSet
public static int smallSetCuttoff=3000;
/*
public void testIntersectionSizePerformance() {
loadfactor=.75f; // for HashDocSet
rand=new Random(1); // make deterministic
int minBigSetSize=1,maxBigSetSize=30000;
@ -323,56 +274,6 @@ public class TestDocSet extends SolrTestCase {
}
***/
/*
public void testExistsPerformance() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=512;
int iter=1;
int maxDoc=1000000;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
int ret=0;
long start=System.currentTimeMillis();
for (int i=0; i<iter; i++) {
for (DocSet s1 : sets) {
for (int j=0; j<maxDoc; j++) {
ret += s1.exists(j) ? 1 :0;
}
}
}
long end=System.currentTimeMillis();
System.out.println("testExistsSizePerformance="+(end-start)+" ms");
if (ret==-1)System.out.println("wow!");
}
***/
/* needs code insertion into HashDocSet
public void testExistsCollisions() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=512;
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
int ret=0;
for (int maxDoc : maxDocs) {
int mask = (BitUtil.nextHighestPowerOfTwo(maxDoc)>>1)-1;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
int cstart = HashDocSet.collisions;
for (DocSet s1 : sets) {
for (int j=0; j<maxDocs[0]; j++) {
int idx = rand.nextInt()&mask;
ret += s1.exists(idx) ? 1 :0;
}
}
int cend = HashDocSet.collisions;
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
}
if (ret==-1)System.out.println("wow!");
System.out.println("collisions="+HashDocSet.collisions);
}
***/
public LeafReader dummyIndexReader(final int maxDoc) {
return new LeafReader() {