mirror of https://github.com/apache/lucene.git
SOLR-14256: Remove HashDocSet; add DocSet.getBits.
* DocSet is now fixed at two implementations, read-only and doc ordered. * DocSetBase removed; not needed anymore. DocSet is now an abstract class.
This commit is contained in:
parent
25892271e8
commit
50a7075862
|
@ -37,6 +37,9 @@ Other Changes
|
|||
|
||||
* SOLR-14258: DocList no longer extends DocSet. (David Smiley)
|
||||
|
||||
* SOLR-14256: Remove HashDocSet; add DocSet.getBits() instead. DocSet is now strictly immutable and ascending order.
|
||||
It's now locked-down to external extension; only 2 impls exist. (David Smiley)
|
||||
|
||||
================== 8.5.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -62,7 +62,6 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.DocSlice;
|
||||
|
@ -324,23 +323,8 @@ public class TaggerRequestHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
final DocSet docSet = searcher.getDocSet(filterQueries);//hopefully in the cache
|
||||
//note: before Solr 4.7 we could call docSet.getBits() but no longer.
|
||||
if (docSet instanceof BitDocSet) {
|
||||
docBits = ((BitDocSet)docSet).getBits();
|
||||
} else {
|
||||
docBits = new Bits() {
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return docSet.exists(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return searcher.maxDoc();
|
||||
}
|
||||
};
|
||||
}
|
||||
docBits = docSet.getBits();
|
||||
} else {
|
||||
docBits = searcher.getSlowAtomicReader().getLiveDocs();
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.lucene.search.grouping.AllGroupHeadsCollector;
|
|||
import org.apache.lucene.search.grouping.AllGroupsCollector;
|
||||
import org.apache.lucene.search.grouping.TermGroupFacetCollector;
|
||||
import org.apache.lucene.search.grouping.TermGroupSelector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -83,12 +84,10 @@ import org.apache.solr.search.BitDocSet;
|
|||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.Filter;
|
||||
import org.apache.solr.search.Grouping;
|
||||
import org.apache.solr.search.HashDocSet;
|
||||
import org.apache.solr.search.Insanity;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortedIntDocSet;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.facet.FacetDebugInfo;
|
||||
import org.apache.solr.search.facet.FacetRequest;
|
||||
|
@ -962,10 +961,11 @@ public class SimpleFacets {
|
|||
int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
|
||||
|
||||
// make sure we have a set that is fast for random access, if we will use it for that
|
||||
DocSet fastForRandomSet = docs;
|
||||
if (minDfFilterCache>0 && docs instanceof SortedIntDocSet) {
|
||||
SortedIntDocSet sset = (SortedIntDocSet)docs;
|
||||
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
|
||||
Bits fastForRandomSet;
|
||||
if (minDfFilterCache <= 0) {
|
||||
fastForRandomSet = null;
|
||||
} else {
|
||||
fastForRandomSet = docs.getBits();
|
||||
}
|
||||
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
|
@ -1064,7 +1064,7 @@ public class SimpleFacets {
|
|||
int base = sub.slice.start;
|
||||
int docid;
|
||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid + base)) {
|
||||
if (fastForRandomSet.get(docid + base)) {
|
||||
c++;
|
||||
if (intersectsCheck) {
|
||||
assert c==1;
|
||||
|
@ -1076,7 +1076,7 @@ public class SimpleFacets {
|
|||
} else {
|
||||
int docid;
|
||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid)) {
|
||||
if (fastForRandomSet.get(docid)) {
|
||||
c++;
|
||||
if (intersectsCheck) {
|
||||
assert c==1;
|
||||
|
|
|
@ -32,17 +32,18 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* <code>BitDocSet</code> represents an unordered set of Lucene Document Ids
|
||||
* using a BitSet. A set bit represents inclusion in the set for that document.
|
||||
* A {@link FixedBitSet} based implementation of a {@link DocSet}. Good for medium/large sets.
|
||||
*
|
||||
* @since solr 0.9
|
||||
*/
|
||||
public class BitDocSet extends DocSetBase {
|
||||
public class BitDocSet extends DocSet {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BitDocSet.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class)
|
||||
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // for the array object inside the FixedBitSet. long[] array won't change alignment, so no need to calculate it.
|
||||
|
||||
final FixedBitSet bits;
|
||||
// TODO consider SparseFixedBitSet alternative
|
||||
|
||||
private final FixedBitSet bits;
|
||||
int size; // number of docs in the set (cached for perf)
|
||||
|
||||
public BitDocSet() {
|
||||
|
@ -67,35 +68,6 @@ public class BitDocSet extends DocSetBase {
|
|||
this.size = size;
|
||||
}
|
||||
|
||||
/* DocIterator using nextSetBit()
|
||||
public DocIterator iterator() {
|
||||
return new DocIterator() {
|
||||
int pos=bits.nextSetBit(0);
|
||||
public boolean hasNext() {
|
||||
return pos>=0;
|
||||
}
|
||||
|
||||
public Integer next() {
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
bits.clear(pos);
|
||||
}
|
||||
|
||||
public int nextDoc() {
|
||||
int old=pos;
|
||||
pos=bits.nextSetBit(old+1);
|
||||
return old;
|
||||
}
|
||||
|
||||
public float score() {
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
}
|
||||
***/
|
||||
|
||||
@Override
|
||||
public DocIterator iterator() {
|
||||
return new DocIterator() {
|
||||
|
@ -139,15 +111,13 @@ public class BitDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void add(int doc) {
|
||||
bits.set(doc);
|
||||
size=-1; // invalidate size
|
||||
protected FixedBitSet getFixedBitSet() {
|
||||
return bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addUnique(int doc) {
|
||||
bits.set(doc);
|
||||
size=-1; // invalidate size
|
||||
protected FixedBitSet getFixedBitSetClone() {
|
||||
return bits.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -156,14 +126,6 @@ public class BitDocSet extends DocSetBase {
|
|||
return size = bits.cardinality();
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of set bits - size - is cached. If the bitset is changed externally,
|
||||
* this method should be used to invalidate the previously cached size.
|
||||
*/
|
||||
public void invalidateSize() {
|
||||
size=-1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true of the doc exists in the set. Should only be called when doc <
|
||||
* {@link FixedBitSet#length()}.
|
||||
|
@ -173,6 +135,20 @@ public class BitDocSet extends DocSetBase {
|
|||
return bits.get(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet intersection(DocSet other) {
|
||||
// intersection is overloaded in the smaller DocSets to be more
|
||||
// efficient, so dispatch off of it instead.
|
||||
if (!(other instanceof BitDocSet)) {
|
||||
return other.intersection(this);
|
||||
}
|
||||
|
||||
// Default... handle with bitsets.
|
||||
FixedBitSet newbits = getFixedBitSetClone();
|
||||
newbits.and(other.getFixedBitSet());
|
||||
return new BitDocSet(newbits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intersectionSize(DocSet other) {
|
||||
if (other instanceof BitDocSet) {
|
||||
|
@ -217,12 +193,8 @@ public class BitDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void addAllTo(DocSet target) {
|
||||
if (target instanceof BitDocSet) {
|
||||
((BitDocSet) target).bits.or(bits);
|
||||
} else {
|
||||
super.addAllTo(target);
|
||||
}
|
||||
public void addAllTo(FixedBitSet target) {
|
||||
target.or(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,119 +17,116 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* <code>DocSet</code> represents an unordered set of Lucene Document Ids.
|
||||
* An immutable ordered set of Lucene Document Ids.
|
||||
* It's similar to a Lucene {@link org.apache.lucene.search.DocIdSet}.
|
||||
*
|
||||
* <p>
|
||||
* WARNING: Any DocSet returned from SolrIndexSearcher should <b>not</b> be modified as it may have been retrieved from
|
||||
* a cache and could be shared.
|
||||
* </p>
|
||||
*
|
||||
* @since solr 0.9
|
||||
*/
|
||||
public interface DocSet extends Accountable, Cloneable /* extends Collection<Integer> */ {
|
||||
|
||||
/**
|
||||
* Adds the specified document if it is not currently in the DocSet
|
||||
* (optional operation).
|
||||
*
|
||||
* @see #addUnique
|
||||
* @throws SolrException if the implementation does not allow modifications
|
||||
*/
|
||||
public void add(int doc);
|
||||
public abstract class DocSet implements Accountable, Cloneable /* extends Collection<Integer> */ {
|
||||
|
||||
/**
|
||||
* Adds a document the caller knows is not currently in the DocSet
|
||||
* (optional operation).
|
||||
*
|
||||
* <p>
|
||||
* This method may be faster then <code>add(doc)</code> in some
|
||||
* implementations provided the caller is certain of the precondition.
|
||||
* </p>
|
||||
*
|
||||
* @see #add
|
||||
* @throws SolrException if the implementation does not allow modifications
|
||||
*/
|
||||
public void addUnique(int doc);
|
||||
// package accessible; guarantee known implementations
|
||||
DocSet() {
|
||||
assert this instanceof BitDocSet || this instanceof SortedIntDocSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of documents in the set.
|
||||
*/
|
||||
public int size();
|
||||
public abstract int size();
|
||||
|
||||
/**
|
||||
* Returns true if a document is in the DocSet.
|
||||
* If you want to be guaranteed fast random access, use {@link #getBits()} instead.
|
||||
*/
|
||||
public boolean exists(int docid);
|
||||
public abstract boolean exists(int docid);
|
||||
|
||||
/**
|
||||
* Returns an iterator that may be used to iterate over all of the documents in the set.
|
||||
*
|
||||
* <p>
|
||||
* The order of the documents returned by this iterator is
|
||||
* non-deterministic, and any scoring information is meaningless
|
||||
* </p>
|
||||
* Returns an ordered iterator of the documents in the set. Any scoring information is meaningless.
|
||||
*/
|
||||
public DocIterator iterator();
|
||||
//TODO switch to DocIdSetIterator in Solr 9?
|
||||
public abstract DocIterator iterator();
|
||||
|
||||
/**
|
||||
* Returns the intersection of this set with another set. Neither set is modified - a new DocSet is
|
||||
* created and returned.
|
||||
* @return a DocSet representing the intersection
|
||||
*/
|
||||
public DocSet intersection(DocSet other);
|
||||
public abstract DocSet intersection(DocSet other);
|
||||
|
||||
/**
|
||||
* Returns the number of documents of the intersection of this set with another set.
|
||||
* May be more efficient than actually creating the intersection and then getting its size.
|
||||
*/
|
||||
public int intersectionSize(DocSet other);
|
||||
public abstract int intersectionSize(DocSet other);
|
||||
|
||||
/** Returns true if these sets have any elements in common */
|
||||
public boolean intersects(DocSet other);
|
||||
public abstract boolean intersects(DocSet other);
|
||||
|
||||
/**
|
||||
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
|
||||
* created and returned.
|
||||
* @return a DocSet representing the union
|
||||
*/
|
||||
public DocSet union(DocSet other);
|
||||
public abstract DocSet union(DocSet other);
|
||||
|
||||
/**
|
||||
* Returns the number of documents of the union of this set with another set.
|
||||
* May be more efficient than actually creating the union and then getting its size.
|
||||
*/
|
||||
public int unionSize(DocSet other);
|
||||
public int unionSize(DocSet other) {
|
||||
return this.size() + other.size() - this.intersectionSize(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the documents in this set that are not in the other set. Neither set is modified - a new DocSet is
|
||||
* created and returned.
|
||||
* @return a DocSet representing this AND NOT other
|
||||
*/
|
||||
public DocSet andNot(DocSet other);
|
||||
public abstract DocSet andNot(DocSet other);
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this set that are not in the other set.
|
||||
*/
|
||||
public int andNotSize(DocSet other);
|
||||
public int andNotSize(DocSet other) {
|
||||
return this.size() - this.intersectionSize(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Filter for use in Lucene search methods, assuming this DocSet
|
||||
* was generated from the top-level MultiReader that the Lucene search
|
||||
* methods will be invoked with.
|
||||
*/
|
||||
public Filter getTopFilter();
|
||||
public abstract Filter getTopFilter();
|
||||
|
||||
/**
|
||||
* Adds all the docs from this set to the target set. The target should be
|
||||
* Adds all the docs from this set to the target. The target should be
|
||||
* sized large enough to accommodate all of the documents before calling this
|
||||
* method.
|
||||
*/
|
||||
public void addAllTo(DocSet target);
|
||||
public abstract void addAllTo(FixedBitSet target);
|
||||
|
||||
public DocSet clone();
|
||||
|
||||
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
|
||||
public abstract DocSet clone();
|
||||
|
||||
public static final DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
|
||||
|
||||
/**
|
||||
* A {@link Bits} that has fast random access (as is generally required of Bits).
|
||||
* It may be necessary to do work to build this.
|
||||
*/
|
||||
public abstract Bits getBits();
|
||||
|
||||
// internal only
|
||||
protected abstract FixedBitSet getFixedBitSet();
|
||||
|
||||
// internal only
|
||||
protected abstract FixedBitSet getFixedBitSetClone();
|
||||
|
||||
}
|
||||
|
|
|
@ -1,266 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/** A base class that may be useful for implementing DocSets */
|
||||
abstract class DocSetBase implements DocSet {
|
||||
|
||||
public static FixedBitSet toBitSet(DocSet set) {
|
||||
if (set instanceof DocSetBase) {
|
||||
return ((DocSetBase) set).getBits();
|
||||
} else {
|
||||
FixedBitSet bits = new FixedBitSet(64);
|
||||
for (DocIterator iter = set.iterator(); iter.hasNext();) {
|
||||
int nextDoc = iter.nextDoc();
|
||||
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
|
||||
bits.set(nextDoc);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
|
||||
// Not implemented efficiently... for testing purposes only
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof DocSet)) return false;
|
||||
DocSet other = (DocSet)obj;
|
||||
if (this.size() != other.size()) return false;
|
||||
|
||||
if (this instanceof DocList && other instanceof DocList) {
|
||||
// compare ordering
|
||||
DocIterator i1=this.iterator();
|
||||
DocIterator i2=other.iterator();
|
||||
while(i1.hasNext() && i2.hasNext()) {
|
||||
if (i1.nextDoc() != i2.nextDoc()) return false;
|
||||
}
|
||||
return true;
|
||||
// don't compare matches
|
||||
}
|
||||
|
||||
FixedBitSet bs1 = this.getBits();
|
||||
FixedBitSet bs2 = toBitSet(other);
|
||||
|
||||
// resize both BitSets to make sure they have the same amount of zero padding
|
||||
|
||||
int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
|
||||
bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
|
||||
bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
|
||||
|
||||
// if (this.size() != other.size()) return false;
|
||||
return bs1.equals(bs2);
|
||||
}
|
||||
|
||||
public DocSet clone() {
|
||||
throw new RuntimeException(new CloneNotSupportedException());
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws SolrException Base implementation does not allow modifications
|
||||
*/
|
||||
@Override
|
||||
public void add(int doc) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Unsupported Operation");
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws SolrException Base implementation does not allow modifications
|
||||
*/
|
||||
@Override
|
||||
public void addUnique(int doc) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Unsupported Operation");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a {@link FixedBitSet} with a bit set for every document in this
|
||||
* {@link DocSet}. The default implementation iterates on all docs and sets
|
||||
* the relevant bits. You should override if you can provide a more efficient
|
||||
* implementation.
|
||||
*/
|
||||
protected FixedBitSet getBits() {
|
||||
FixedBitSet bits = new FixedBitSet(size());
|
||||
for (DocIterator iter = iterator(); iter.hasNext();) {
|
||||
int nextDoc = iter.nextDoc();
|
||||
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
|
||||
bits.set(nextDoc);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet intersection(DocSet other) {
|
||||
// intersection is overloaded in the smaller DocSets to be more
|
||||
// efficient, so dispatch off of it instead.
|
||||
if (!(other instanceof BitDocSet)) {
|
||||
return other.intersection(this);
|
||||
}
|
||||
|
||||
// Default... handle with bitsets.
|
||||
FixedBitSet newbits = getBits().clone();
|
||||
newbits.and(toBitSet(other));
|
||||
return new BitDocSet(newbits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean intersects(DocSet other) {
|
||||
// intersection is overloaded in the smaller DocSets to be more
|
||||
// efficient, so dispatch off of it instead.
|
||||
if (!(other instanceof BitDocSet)) {
|
||||
return other.intersects(this);
|
||||
}
|
||||
// less efficient way: get the intersection size
|
||||
return intersectionSize(other) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet union(DocSet other) {
|
||||
FixedBitSet otherBits = toBitSet(other);
|
||||
FixedBitSet newbits = FixedBitSet.ensureCapacity(getBits().clone(), otherBits.length());
|
||||
newbits.or(otherBits);
|
||||
return new BitDocSet(newbits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intersectionSize(DocSet other) {
|
||||
// intersection is overloaded in the smaller DocSets to be more
|
||||
// efficient, so dispatch off of it instead.
|
||||
if (!(other instanceof BitDocSet)) {
|
||||
return other.intersectionSize(this);
|
||||
}
|
||||
// less efficient way: do the intersection then get its size
|
||||
return intersection(other).size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int unionSize(DocSet other) {
|
||||
return this.size() + other.size() - this.intersectionSize(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet andNot(DocSet other) {
|
||||
FixedBitSet newbits = getBits().clone();
|
||||
newbits.andNot(toBitSet(other));
|
||||
return new BitDocSet(newbits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int andNotSize(DocSet other) {
|
||||
return this.size() - this.intersectionSize(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Filter getTopFilter() {
|
||||
return new Filter() {
|
||||
final FixedBitSet bs = getBits();
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(final LeafReaderContext context, Bits acceptDocs) {
|
||||
LeafReader reader = context.reader();
|
||||
// all Solr DocSets that are used as filters only include live docs
|
||||
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
|
||||
|
||||
if (context.isTopLevel) {
|
||||
return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
|
||||
}
|
||||
|
||||
final int base = context.docBase;
|
||||
final int maxDoc = reader.maxDoc();
|
||||
final int max = base + maxDoc; // one past the max doc in this segment.
|
||||
|
||||
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
int pos=base-1;
|
||||
int adjustedDoc=-1;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return adjustedDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
pos = bs.nextSetBit(pos+1); // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
|
||||
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
|
||||
pos = bs.nextSetBit(target+base);
|
||||
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return bs.length();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return bs.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() {
|
||||
// sparse filters should not use random access
|
||||
return null;
|
||||
}
|
||||
|
||||
}, acceptDocs2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "DocSetTopFilter";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return sameClassAs(other) &&
|
||||
Objects.equals(bs, getClass().cast(other).bs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return classHash() ^ bs.hashCode();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addAllTo(DocSet target) {
|
||||
DocIterator iter = iterator();
|
||||
while (iter.hasNext()) {
|
||||
target.add(iter.nextDoc());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -556,8 +556,7 @@ abstract class PointSetQuery extends Query implements DocSetProducer, Accountabl
|
|||
}
|
||||
if (searcher instanceof SolrIndexSearcher) {
|
||||
return ((SolrIndexSearcher) searcher).getLiveDocSet().getBits();
|
||||
} else {
|
||||
// TODO Does this ever happen? In Solr should always be SolrIndexSearcher?
|
||||
} else { // could happen in Delete-by-query situation
|
||||
//smallSetSize==0 thus will always produce a BitDocSet (FixedBitSet)
|
||||
DocSetCollector docSetCollector = new DocSetCollector(0, searcher.getIndexReader().maxDoc());
|
||||
searcher.search(new MatchAllDocsQuery(), docSetCollector);
|
||||
|
|
|
@ -1,310 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
||||
/**
|
||||
* <code>HashDocSet</code> represents an unordered set of Lucene Document Ids
|
||||
* using a primitive int hash table. It can be a better choice if there are few docs
|
||||
* in the set because it takes up less memory and is faster to iterate and take
|
||||
* set intersections.
|
||||
*
|
||||
*
|
||||
* @since solr 0.9
|
||||
*/
|
||||
public final class HashDocSet extends DocSetBase {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(HashDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
||||
|
||||
/** Default load factor to use for HashDocSets. We keep track of the inverse
|
||||
* since multiplication is so much faster than division. The default
|
||||
* is 1.0f / 0.75f
|
||||
*/
|
||||
static float DEFAULT_INVERSE_LOAD_FACTOR = 1.0f /0.75f;
|
||||
|
||||
// public final static int MAX_SIZE = SolrConfig.config.getInt("//HashDocSet/@maxSize",-1);
|
||||
|
||||
|
||||
// lucene docs are numbered from 0, so a neg number must be used for missing.
|
||||
// an alternative to having to init the array to EMPTY at the start is
|
||||
//
|
||||
private final static int EMPTY=-1;
|
||||
private final int[] table;
|
||||
private final int size;
|
||||
private final int mask;
|
||||
|
||||
public HashDocSet(HashDocSet set) {
|
||||
this.table = set.table.clone();
|
||||
this.size = set.size;
|
||||
this.mask = set.mask;
|
||||
}
|
||||
|
||||
/** Create a HashDocSet from a list of *unique* ids */
|
||||
public HashDocSet(int[] docs, int offset, int len) {
|
||||
this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
|
||||
}
|
||||
|
||||
/** Create a HashDocSet from a list of *unique* ids */
|
||||
public HashDocSet(int[] docs, int offset, int len, float inverseLoadFactor) {
|
||||
int tsize = Math.max(BitUtil.nextHighestPowerOfTwo(len), 1);
|
||||
if (tsize < len * inverseLoadFactor) {
|
||||
tsize <<= 1;
|
||||
}
|
||||
|
||||
mask=tsize-1;
|
||||
|
||||
table = new int[tsize];
|
||||
// (for now) better then: Arrays.fill(table, EMPTY);
|
||||
// https://issues.apache.org/jira/browse/SOLR-390
|
||||
for (int i=tsize-1; i>=0; i--) table[i]=EMPTY;
|
||||
|
||||
int end = offset + len;
|
||||
for (int i=offset; i<end; i++) {
|
||||
put(docs[i]);
|
||||
}
|
||||
|
||||
size = len;
|
||||
}
|
||||
|
||||
void put(int doc) {
|
||||
int s = doc & mask;
|
||||
while (table[s]!=EMPTY) {
|
||||
// Adding an odd number to this power-of-two hash table is
|
||||
// guaranteed to do a full traversal, so instead of re-hashing
|
||||
// we jump straight to a "linear" traversal.
|
||||
// The key is that we provide many different ways to do the
|
||||
// traversal (tablesize/2) based on the last hash code (the doc).
|
||||
// Rely on loop invariant code motion to eval ((doc>>7)|1) only once.
|
||||
// otherwise, we would need to pull the first case out of the loop.
|
||||
s = (s + ((doc>>7)|1)) & mask;
|
||||
}
|
||||
table[s]=doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(int doc) {
|
||||
int s = doc & mask;
|
||||
for(;;) {
|
||||
int v = table[s];
|
||||
if (v==EMPTY) return false;
|
||||
if (v==doc) return true;
|
||||
// see put() for algorithm details.
|
||||
s = (s + ((doc>>7)|1)) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIterator iterator() {
|
||||
return new DocIterator() {
|
||||
int pos=0;
|
||||
int doc;
|
||||
{ goNext(); }
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return pos < table.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer next() {
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
}
|
||||
|
||||
void goNext() {
|
||||
while (pos<table.length && table[pos]==EMPTY) pos++;
|
||||
}
|
||||
|
||||
// modify to return -1 at end of iteration?
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
int doc = table[pos];
|
||||
pos++;
|
||||
goNext();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet intersection(DocSet other) {
|
||||
if (other instanceof HashDocSet) {
|
||||
// set "a" to the smallest doc set for the most efficient
|
||||
// intersection.
|
||||
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
|
||||
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
|
||||
|
||||
int[] result = new int[a.size()];
|
||||
int resultCount=0;
|
||||
for (int i=0; i<a.table.length; i++) {
|
||||
int id=a.table[i];
|
||||
if (id >= 0 && b.exists(id)) {
|
||||
result[resultCount++]=id;
|
||||
}
|
||||
}
|
||||
return new HashDocSet(result,0,resultCount);
|
||||
|
||||
} else {
|
||||
|
||||
int[] result = new int[size()];
|
||||
int resultCount=0;
|
||||
for (int i=0; i<table.length; i++) {
|
||||
int id=table[i];
|
||||
if (id >= 0 && other.exists(id)) {
|
||||
result[resultCount++]=id;
|
||||
}
|
||||
}
|
||||
return new HashDocSet(result,0,resultCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intersectionSize(DocSet other) {
|
||||
if (other instanceof HashDocSet) {
|
||||
// set "a" to the smallest doc set for the most efficient
|
||||
// intersection.
|
||||
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
|
||||
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
|
||||
|
||||
int resultCount=0;
|
||||
for (int i=0; i<a.table.length; i++) {
|
||||
int id=a.table[i];
|
||||
if (id >= 0 && b.exists(id)) {
|
||||
resultCount++;
|
||||
}
|
||||
}
|
||||
return resultCount;
|
||||
} else {
|
||||
int resultCount=0;
|
||||
for (int i=0; i<table.length; i++) {
|
||||
int id=table[i];
|
||||
if (id >= 0 && other.exists(id)) {
|
||||
resultCount++;
|
||||
}
|
||||
}
|
||||
return resultCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean intersects(DocSet other) {
|
||||
if (other instanceof HashDocSet) {
|
||||
// set "a" to the smallest doc set for the most efficient
|
||||
// intersection.
|
||||
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
|
||||
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
|
||||
|
||||
for (int i=0; i<a.table.length; i++) {
|
||||
int id=a.table[i];
|
||||
if (id >= 0 && b.exists(id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
for (int i=0; i<table.length; i++) {
|
||||
int id=table[i];
|
||||
if (id >= 0 && other.exists(id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet andNot(DocSet other) {
|
||||
int[] result = new int[size()];
|
||||
int resultCount=0;
|
||||
|
||||
for (int i=0; i<table.length; i++) {
|
||||
int id=table[i];
|
||||
if (id >= 0 && !other.exists(id)) {
|
||||
result[resultCount++]=id;
|
||||
}
|
||||
}
|
||||
return new HashDocSet(result,0,resultCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet union(DocSet other) {
|
||||
if (other instanceof HashDocSet) {
|
||||
// set "a" to the smallest doc set
|
||||
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
|
||||
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
|
||||
|
||||
int[] result = new int[a.size()+b.size()];
|
||||
int resultCount=0;
|
||||
// iterate over the largest table first, adding w/o checking.
|
||||
for (int i=0; i<b.table.length; i++) {
|
||||
int id=b.table[i];
|
||||
if (id>=0) result[resultCount++]=id;
|
||||
}
|
||||
|
||||
// now iterate over smaller set, adding all not already in larger set.
|
||||
for (int i=0; i<a.table.length; i++) {
|
||||
int id=a.table[i];
|
||||
if (id>=0 && !b.exists(id)) result[resultCount++]=id;
|
||||
}
|
||||
|
||||
return new HashDocSet(result,0,resultCount);
|
||||
} else {
|
||||
return other.union(this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashDocSet clone() {
|
||||
return new HashDocSet(this);
|
||||
}
|
||||
|
||||
// don't implement andNotSize() and unionSize() on purpose... they are implemented
|
||||
// in BaseDocSet in terms of intersectionSize().
|
||||
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + (table.length<<2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
|
@ -409,10 +409,11 @@ class JoinQuery extends Query {
|
|||
List<DocSet> resultList = new ArrayList<>(10);
|
||||
|
||||
// make sure we have a set that is fast for random access, if we will use it for that
|
||||
DocSet fastForRandomSet = fromSet;
|
||||
if (minDocFreqFrom>0 && fromSet instanceof SortedIntDocSet) {
|
||||
SortedIntDocSet sset = (SortedIntDocSet)fromSet;
|
||||
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
|
||||
Bits fastForRandomSet;
|
||||
if (minDocFreqFrom <= 0) {
|
||||
fastForRandomSet = null;
|
||||
} else {
|
||||
fastForRandomSet = fromSet.getBits();
|
||||
}
|
||||
|
||||
|
||||
|
@ -480,7 +481,7 @@ class JoinQuery extends Query {
|
|||
int base = sub.slice.start;
|
||||
int docid;
|
||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid+base)) {
|
||||
if (fastForRandomSet.get(docid+base)) {
|
||||
intersects = true;
|
||||
break outer;
|
||||
}
|
||||
|
@ -489,7 +490,7 @@ class JoinQuery extends Query {
|
|||
} else {
|
||||
int docid;
|
||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid)) {
|
||||
if (fastForRandomSet.get(docid)) {
|
||||
intersects = true;
|
||||
break;
|
||||
}
|
||||
|
@ -521,10 +522,10 @@ class JoinQuery extends Query {
|
|||
DocSet toTermSet = toSearcher.getDocSet(toDeState);
|
||||
resultListDocs += toTermSet.size();
|
||||
if (resultBits != null) {
|
||||
toTermSet.addAllTo(new BitDocSet(resultBits));
|
||||
toTermSet.addAllTo(resultBits);
|
||||
} else {
|
||||
if (toTermSet instanceof BitDocSet) {
|
||||
resultBits = ((BitDocSet)toTermSet).bits.clone();
|
||||
resultBits = ((BitDocSet)toTermSet).getBits().clone();
|
||||
} else {
|
||||
resultList.add(toTermSet);
|
||||
}
|
||||
|
@ -568,11 +569,10 @@ class JoinQuery extends Query {
|
|||
smallSetsDeferred = resultList.size();
|
||||
|
||||
if (resultBits != null) {
|
||||
BitDocSet bitSet = new BitDocSet(resultBits);
|
||||
for (DocSet set : resultList) {
|
||||
set.addAllTo(bitSet);
|
||||
set.addAllTo(resultBits);
|
||||
}
|
||||
return bitSet;
|
||||
return new BitDocSet(resultBits);
|
||||
}
|
||||
|
||||
if (resultList.size()==0) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.search;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
|
@ -29,9 +30,9 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* <code>SortedIntDocSet</code> represents a sorted set of Lucene Document Ids.
|
||||
* A simple sorted int[] array implementation of {@link DocSet}, good for small sets.
|
||||
*/
|
||||
public class SortedIntDocSet extends DocSetBase {
|
||||
public class SortedIntDocSet extends DocSet {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SortedIntDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
||||
|
||||
protected final int[] docs;
|
||||
|
@ -66,24 +67,6 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
return newArr;
|
||||
}
|
||||
|
||||
/** Returns the index of the first non-sorted element or -1 if they are all sorted */
|
||||
public static int firstNonSorted(int[] arr, int offset, int len) {
|
||||
if (len <= 1) return -1;
|
||||
int lower = arr[offset];
|
||||
int end = offset + len;
|
||||
for(int i=offset+1; i<end; i++) {
|
||||
int next = arr[i];
|
||||
if (next <= lower) {
|
||||
for (int j=i-1; j>offset; j--) {
|
||||
if (arr[j]<next) return j+1;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
lower = next;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static int intersectionSize(int[] smallerSortedList, int[] biggerSortedList) {
|
||||
final int a[] = smallerSortedList;
|
||||
final int b[] = biggerSortedList;
|
||||
|
@ -222,8 +205,7 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
@Override
|
||||
public int intersectionSize(DocSet other) {
|
||||
if (!(other instanceof SortedIntDocSet)) {
|
||||
// assume other implementations are better at random access than we are,
|
||||
// true of BitDocSet and HashDocSet.
|
||||
// BitDocSet is better at random access than we are
|
||||
int icount = 0;
|
||||
for (int i=0; i<docs.length; i++) {
|
||||
if (other.exists(docs[i])) icount++;
|
||||
|
@ -272,10 +254,9 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
@Override
|
||||
public boolean intersects(DocSet other) {
|
||||
if (!(other instanceof SortedIntDocSet)) {
|
||||
// assume other implementations are better at random access than we are,
|
||||
// true of BitDocSet and HashDocSet.
|
||||
for (int i=0; i<docs.length; i++) {
|
||||
if (other.exists(docs[i])) return true;
|
||||
// assume BitDocSet is better at random access than we are
|
||||
for (int doc : docs) {
|
||||
if (other.exists(doc)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -561,9 +542,9 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void addAllTo(DocSet target) {
|
||||
public void addAllTo(FixedBitSet target) {
|
||||
for (int doc : docs) {
|
||||
target.add(doc);
|
||||
target.set(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -590,7 +571,6 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocIterator iterator() {
|
||||
|
@ -627,13 +607,40 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public FixedBitSet getBits() {
|
||||
int maxDoc = size() > 0 ? docs[size()-1] : 0;
|
||||
FixedBitSet bs = new FixedBitSet(maxDoc+1);
|
||||
public Bits getBits() {
|
||||
IntHashSet hashSet = new IntHashSet(docs.length);
|
||||
for (int doc : docs) {
|
||||
bs.set(doc);
|
||||
hashSet.add(doc);
|
||||
}
|
||||
return bs;
|
||||
|
||||
return new Bits() {
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return hashSet.contains(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return getLength();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** the {@link Bits#length()} or maxdoc (1 greater than largest possible doc number) */
|
||||
private int getLength() {
|
||||
return size() == 0 ? 0 : getDocs()[size() - 1] + 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FixedBitSet getFixedBitSet() {
|
||||
return getFixedBitSetClone();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FixedBitSet getFixedBitSetClone() {
|
||||
FixedBitSet bitSet = new FixedBitSet(getLength());
|
||||
addAllTo(bitSet);
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
public static int findIndex(int[] arr, int value, int low, int high) {
|
||||
|
@ -655,6 +662,15 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
return low;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocSet union(DocSet other) {
|
||||
// TODO could be more efficient if both are SortedIntDocSet
|
||||
FixedBitSet otherBits = other.getFixedBitSet();
|
||||
FixedBitSet newbits = FixedBitSet.ensureCapacity(getFixedBitSetClone(), otherBits.length());
|
||||
newbits.or(otherBits);
|
||||
return new BitDocSet(newbits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Filter getTopFilter() {
|
||||
return new Filter() {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -39,9 +40,7 @@ import org.apache.solr.common.util.SimpleOrderedMap;
|
|||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.HashDocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortedIntDocSet;
|
||||
import org.apache.solr.search.facet.SlotAcc.SlotContext;
|
||||
|
||||
/**
|
||||
|
@ -57,7 +56,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
|
|||
boolean hasSubFacets; // true if there are subfacets
|
||||
int minDfFilterCache;
|
||||
DocSet docs;
|
||||
DocSet fastForRandomSet;
|
||||
Bits fastForRandomSet;
|
||||
TermsEnum termsEnum = null;
|
||||
SolrIndexSearcher.DocsEnumState deState = null;
|
||||
PostingsEnum postingsEnum;
|
||||
|
@ -265,11 +264,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
|
|||
|
||||
// lazy convert to fastForRandomSet
|
||||
if (fastForRandomSet == null) {
|
||||
fastForRandomSet = docs;
|
||||
if (docs instanceof SortedIntDocSet) { // OFF-HEAP todo: also check for native version
|
||||
SortedIntDocSet sset = (SortedIntDocSet) docs;
|
||||
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
|
||||
}
|
||||
fastForRandomSet = docs.getBits();
|
||||
}
|
||||
// iterate over TermDocs to calculate the intersection
|
||||
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
|
||||
|
@ -285,12 +280,12 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
|
|||
|
||||
if (countOnly) {
|
||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid + base)) c++;
|
||||
if (fastForRandomSet.get(docid + base)) c++;
|
||||
}
|
||||
} else {
|
||||
setNextReader(leaves[sub.slice.readerIndex]);
|
||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid + base)) {
|
||||
if (fastForRandomSet.get(docid + base)) {
|
||||
c++;
|
||||
collect(docid, 0, slotContext);
|
||||
}
|
||||
|
@ -302,12 +297,12 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
|
|||
int docid;
|
||||
if (countOnly) {
|
||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid)) c++;
|
||||
if (fastForRandomSet.get(docid)) c++;
|
||||
}
|
||||
} else {
|
||||
setNextReader(leaves[0]);
|
||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid)) {
|
||||
if (fastForRandomSet.get(docid)) {
|
||||
c++;
|
||||
collect(docid, 0, slotContext);
|
||||
}
|
||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
|
|||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
@ -47,8 +46,6 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.RptWithGeometrySpatialField;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.DistanceUnits;
|
||||
|
@ -259,15 +256,8 @@ public class FacetHeatmap extends FacetRequest {
|
|||
return null; // means match everything (all live docs). This can speedup things a lot.
|
||||
} else if (docSet.size() == 0) {
|
||||
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
|
||||
} else if (docSet instanceof BitDocSet) {
|
||||
return ((BitDocSet) docSet).getBits();
|
||||
} else {
|
||||
// TODO DocSetBase.getBits ought to be at DocSet level? Though it doesn't know maxDoc but it could?
|
||||
FixedBitSet bits = new FixedBitSet(searcher.maxDoc());
|
||||
for (DocIterator iter = docSet.iterator(); iter.hasNext();) {
|
||||
bits.set(iter.nextDoc());
|
||||
}
|
||||
return bits;
|
||||
return docSet.getBits();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -162,11 +162,6 @@
|
|||
<queryResponseWriter name="xml" default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
|
||||
|
||||
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
|
||||
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
|
||||
based HashBitset. -->
|
||||
|
||||
<!-- requestHandler plugins.
|
||||
-->
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
|
|
|
@ -162,10 +162,6 @@
|
|||
<queryResponseWriter name="xml" default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
|
||||
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
|
||||
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
|
||||
based HashBitset. -->
|
||||
|
||||
<!-- requestHandler plugins... incoming queries will be dispatched to the
|
||||
correct handler based on the 'qt' param matching the
|
||||
name of registered handlers.
|
||||
|
|
|
@ -176,10 +176,6 @@
|
|||
<queryResponseWriter name="xml" default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
|
||||
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
|
||||
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
|
||||
based HashBitset. -->
|
||||
|
||||
<!-- requestHandler plugins
|
||||
-->
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
|
|
|
@ -161,10 +161,6 @@
|
|||
<queryResponseWriter name="xml" default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
|
||||
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
|
||||
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
|
||||
based HashBitset. -->
|
||||
|
||||
<!-- requestHandler plugins... incoming queries will be dispatched to the
|
||||
correct handler based on the 'qt' param matching the
|
||||
name of registered handlers.
|
||||
|
|
|
@ -175,10 +175,6 @@
|
|||
<queryResponseWriter name="xml" default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
|
||||
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
|
||||
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
|
||||
based HashBitset. -->
|
||||
|
||||
<!-- requestHandler plugins
|
||||
-->
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
|
|
|
@ -35,7 +35,6 @@ public class DocSetPerf {
|
|||
|
||||
static FixedBitSet bs;
|
||||
static BitDocSet bds;
|
||||
static HashDocSet hds;
|
||||
static int[] ids; // not unique
|
||||
|
||||
static Random rand = getRandom();
|
||||
|
@ -59,7 +58,6 @@ public class DocSetPerf {
|
|||
}
|
||||
}
|
||||
bds = new BitDocSet(bs,bitsToSet);
|
||||
hds = new HashDocSet(ids,0,count);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
@ -81,13 +79,11 @@ public class DocSetPerf {
|
|||
|
||||
FixedBitSet[] sets = new FixedBitSet[numSets];
|
||||
DocSet[] bset = new DocSet[numSets];
|
||||
DocSet[] hset = new DocSet[numSets];
|
||||
|
||||
|
||||
for (int i=0; i<numSets; i++) {
|
||||
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
|
||||
sets[i] = bs;
|
||||
bset[i] = bds;
|
||||
hset[i] = hds;
|
||||
}
|
||||
|
||||
final RTimer timer = new RTimer();
|
||||
|
@ -97,7 +93,6 @@ public class DocSetPerf {
|
|||
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
|
||||
FixedBitSet bs1=bs;
|
||||
BitDocSet bds1=bds;
|
||||
HashDocSet hds1=hds;
|
||||
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
|
||||
|
||||
FixedBitSet res = bs1.clone();
|
||||
|
@ -106,17 +101,6 @@ public class DocSetPerf {
|
|||
|
||||
test(bds1.intersection(bds).size() == icount);
|
||||
test(bds1.intersectionSize(bds) == icount);
|
||||
if (bds1.intersection(hds).size() != icount) {
|
||||
DocSet ds = bds1.intersection(hds);
|
||||
System.out.println("STOP");
|
||||
}
|
||||
|
||||
test(bds1.intersection(hds).size() == icount);
|
||||
test(bds1.intersectionSize(hds) == icount);
|
||||
test(hds1.intersection(bds).size() == icount);
|
||||
test(hds1.intersectionSize(bds) == icount);
|
||||
test(hds1.intersection(hds).size() == icount);
|
||||
test(hds1.intersectionSize(hds) == icount);
|
||||
|
||||
ret += icount;
|
||||
}
|
||||
|
@ -126,7 +110,6 @@ public class DocSetPerf {
|
|||
String oper=null;
|
||||
|
||||
if (test.endsWith("B")) { type="B"; }
|
||||
if (test.endsWith("H")) { type="H"; }
|
||||
if (test.endsWith("M")) { type="M"; }
|
||||
if (test.startsWith("intersect")) oper="intersect";
|
||||
if (test.startsWith("intersectSize")) oper="intersectSize";
|
||||
|
@ -141,14 +124,10 @@ public class DocSetPerf {
|
|||
|
||||
if (type=="B") {
|
||||
a=bset[idx1]; b=bset[idx2];
|
||||
} else if (type=="H") {
|
||||
a=hset[idx1]; b=bset[idx2];
|
||||
} else if (type=="M") {
|
||||
if (idx1 < idx2) {
|
||||
a=bset[idx1];
|
||||
b=hset[idx2];
|
||||
} else {
|
||||
a=hset[idx1];
|
||||
b=bset[idx2];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,7 +50,6 @@ import org.apache.solr.SolrTestCase;
|
|||
*/
|
||||
public class TestDocSet extends SolrTestCase {
|
||||
Random rand;
|
||||
float loadfactor;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
|
@ -89,15 +88,6 @@ public class TestDocSet extends SolrTestCase {
|
|||
return bs;
|
||||
}
|
||||
|
||||
public DocSet getHashDocSet(FixedBitSet bs) {
|
||||
int[] docs = new int[bs.cardinality()];
|
||||
BitSetIterator iter = new BitSetIterator(bs, 0);
|
||||
for (int i=0; i<docs.length; i++) {
|
||||
docs[i] = iter.nextDoc();
|
||||
}
|
||||
return new HashDocSet(docs,0,docs.length);
|
||||
}
|
||||
|
||||
public DocSet getIntDocSet(FixedBitSet bs) {
|
||||
int[] docs = new int[bs.cardinality()];
|
||||
BitSetIterator iter = new BitSetIterator(bs, 0);
|
||||
|
@ -130,11 +120,7 @@ public class TestDocSet extends SolrTestCase {
|
|||
|
||||
public DocSet getDocSet(FixedBitSet bs) {
|
||||
switch(rand.nextInt(9)) {
|
||||
case 0: return getHashDocSet(bs);
|
||||
|
||||
case 1: return getBitDocSet(bs);
|
||||
case 2: return getBitDocSet(bs);
|
||||
case 3: return getBitDocSet(bs);
|
||||
case 0: case 1: case 2: case 3: return getBitDocSet(bs);
|
||||
|
||||
case 4: return getIntDocSet(bs);
|
||||
case 5: return getIntDocSet(bs);
|
||||
|
@ -153,8 +139,6 @@ public class TestDocSet extends SolrTestCase {
|
|||
}
|
||||
|
||||
public void iter(DocSet d1, DocSet d2) {
|
||||
// HashDocSet and DocList doesn't iterate in order.
|
||||
if (d1 instanceof HashDocSet || d2 instanceof HashDocSet || d1 instanceof DocList || d2 instanceof DocList) return;
|
||||
|
||||
DocIterator i1 = d1.iterator();
|
||||
DocIterator i2 = d2.iterator();
|
||||
|
@ -235,9 +219,6 @@ public class TestDocSet extends SolrTestCase {
|
|||
if (smallSetType ==0) {
|
||||
Arrays.sort(a);
|
||||
return new SortedIntDocSet(a);
|
||||
} else if (smallSetType ==1) {
|
||||
Arrays.sort(a);
|
||||
return loadfactor!=0 ? new HashDocSet(a,0,n,1/loadfactor) : new HashDocSet(a,0,n);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -258,41 +239,11 @@ public class TestDocSet extends SolrTestCase {
|
|||
return sets;
|
||||
}
|
||||
|
||||
/* needs code insertion into HashDocSet
|
||||
public void testCollisions() {
|
||||
loadfactor=.75f;
|
||||
rand=new Random(12345); // make deterministic
|
||||
int maxSetsize=4000;
|
||||
int nSets=256;
|
||||
int iter=1;
|
||||
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
|
||||
int ret=0;
|
||||
long start=System.currentTimeMillis();
|
||||
for (int maxDoc : maxDocs) {
|
||||
int cstart = HashDocSet.collisions;
|
||||
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
|
||||
for (DocSet s1 : sets) {
|
||||
for (DocSet s2 : sets) {
|
||||
if (s1!=s2) ret += s1.intersectionSize(s2);
|
||||
}
|
||||
}
|
||||
int cend = HashDocSet.collisions;
|
||||
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
|
||||
}
|
||||
long end=System.currentTimeMillis();
|
||||
System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
|
||||
if (ret==-1)System.out.println("wow!");
|
||||
System.out.println("collisions="+HashDocSet.collisions);
|
||||
|
||||
}
|
||||
***/
|
||||
|
||||
public static int smallSetType = 0; // 0==sortedint, 1==hash, 2==FixedBitSet
|
||||
public static int smallSetType = 0; // 0==sortedint, 2==FixedBitSet
|
||||
public static int smallSetCuttoff=3000;
|
||||
|
||||
/*
|
||||
public void testIntersectionSizePerformance() {
|
||||
loadfactor=.75f; // for HashDocSet
|
||||
rand=new Random(1); // make deterministic
|
||||
|
||||
int minBigSetSize=1,maxBigSetSize=30000;
|
||||
|
@ -323,56 +274,6 @@ public class TestDocSet extends SolrTestCase {
|
|||
}
|
||||
***/
|
||||
|
||||
/*
|
||||
public void testExistsPerformance() {
|
||||
loadfactor=.75f;
|
||||
rand=new Random(12345); // make deterministic
|
||||
int maxSetsize=4000;
|
||||
int nSets=512;
|
||||
int iter=1;
|
||||
int maxDoc=1000000;
|
||||
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
|
||||
int ret=0;
|
||||
long start=System.currentTimeMillis();
|
||||
for (int i=0; i<iter; i++) {
|
||||
for (DocSet s1 : sets) {
|
||||
for (int j=0; j<maxDoc; j++) {
|
||||
ret += s1.exists(j) ? 1 :0;
|
||||
}
|
||||
}
|
||||
}
|
||||
long end=System.currentTimeMillis();
|
||||
System.out.println("testExistsSizePerformance="+(end-start)+" ms");
|
||||
if (ret==-1)System.out.println("wow!");
|
||||
}
|
||||
***/
|
||||
|
||||
/* needs code insertion into HashDocSet
|
||||
public void testExistsCollisions() {
|
||||
loadfactor=.75f;
|
||||
rand=new Random(12345); // make deterministic
|
||||
int maxSetsize=4000;
|
||||
int nSets=512;
|
||||
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
|
||||
int ret=0;
|
||||
|
||||
for (int maxDoc : maxDocs) {
|
||||
int mask = (BitUtil.nextHighestPowerOfTwo(maxDoc)>>1)-1;
|
||||
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
|
||||
int cstart = HashDocSet.collisions;
|
||||
for (DocSet s1 : sets) {
|
||||
for (int j=0; j<maxDocs[0]; j++) {
|
||||
int idx = rand.nextInt()&mask;
|
||||
ret += s1.exists(idx) ? 1 :0;
|
||||
}
|
||||
}
|
||||
int cend = HashDocSet.collisions;
|
||||
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
|
||||
}
|
||||
if (ret==-1)System.out.println("wow!");
|
||||
System.out.println("collisions="+HashDocSet.collisions);
|
||||
}
|
||||
***/
|
||||
|
||||
public LeafReader dummyIndexReader(final int maxDoc) {
|
||||
return new LeafReader() {
|
||||
|
|
Loading…
Reference in New Issue