mirror of https://github.com/apache/lucene.git
LUCENE-584: Changed Filter API to return a DocIdSet instead of a java.util.BitSet. This allows using more efficient data structures for Filters and makes them more flexible.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@617859 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1caf5cb9ce
commit
18b61286fa
12
CHANGES.txt
12
CHANGES.txt
|
@ -15,12 +15,16 @@ API Changes
|
||||||
2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly;
|
2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly;
|
||||||
this was accidentally lost with LUCENE-966. (Nicolas Lalevée via
|
this was accidentally lost with LUCENE-966. (Nicolas Lalevée via
|
||||||
Mike McCandless)
|
Mike McCandless)
|
||||||
|
|
||||||
|
3. LUCENE-584: Changed Filter API to return a DocIdSet instead of a
|
||||||
|
java.util.BitSet. This allows using more efficient data structures
|
||||||
|
for Filters and makes them more flexible. (Paul Elschot, Michael Busch)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
|
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
|
||||||
process. The flag is not indexed/stored and is thus only used by analysis.
|
process. The flag is not indexed/stored and is thus only used by analysis.
|
||||||
|
|
||||||
2. LUCENE-1147: Add -segment option to CheckIndex tool so you can
|
2. LUCENE-1147: Add -segment option to CheckIndex tool so you can
|
||||||
|
@ -28,6 +32,12 @@ New features
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
3. LUCENE-1045: Reopened this issue to add support for short and bytes.
|
3. LUCENE-1045: Reopened this issue to add support for short and bytes.
|
||||||
|
|
||||||
|
4. LUCENE-584: Added new data structures to o.a.l.util, such as
|
||||||
|
OpenBitSet and SortedVIntList. These extend DocIdSet and can
|
||||||
|
directly be used for Filters with the new Filter API. Also changed
|
||||||
|
the core Filters to use OpenBitSet instead of java.util.BitSet.
|
||||||
|
(Paul Elschot, Michael Busch)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
|
|
@ -37,8 +37,8 @@ public class ChainedFilterTest extends TestCase {
|
||||||
private Query query;
|
private Query query;
|
||||||
// private DateFilter dateFilter; DateFilter was deprecated and removed
|
// private DateFilter dateFilter; DateFilter was deprecated and removed
|
||||||
private RangeFilter dateFilter;
|
private RangeFilter dateFilter;
|
||||||
private QueryFilter bobFilter;
|
private QueryWrapperFilter bobFilter;
|
||||||
private QueryFilter sueFilter;
|
private QueryWrapperFilter sueFilter;
|
||||||
|
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
directory = new RAMDirectory();
|
directory = new RAMDirectory();
|
||||||
|
@ -74,9 +74,9 @@ public class ChainedFilterTest extends TestCase {
|
||||||
// just treat dates as strings and select the whole range for now...
|
// just treat dates as strings and select the whole range for now...
|
||||||
dateFilter = new RangeFilter("date","","ZZZZ",true,true);
|
dateFilter = new RangeFilter("date","","ZZZZ",true,true);
|
||||||
|
|
||||||
bobFilter = new QueryFilter(
|
bobFilter = new QueryWrapperFilter(
|
||||||
new TermQuery(new Term("owner", "bob")));
|
new TermQuery(new Term("owner", "bob")));
|
||||||
sueFilter = new QueryFilter(
|
sueFilter = new QueryWrapperFilter(
|
||||||
new TermQuery(new Term("owner", "sue")));
|
new TermQuery(new Term("owner", "sue")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ import java.util.Map.Entry;
|
||||||
import org.apache.lucene.search.CachingWrapperFilter;
|
import org.apache.lucene.search.CachingWrapperFilter;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryFilter;
|
import org.apache.lucene.search.QueryWrapperFilter;
|
||||||
import org.apache.lucene.xmlparser.DOMUtils;
|
import org.apache.lucene.xmlparser.DOMUtils;
|
||||||
import org.apache.lucene.xmlparser.FilterBuilder;
|
import org.apache.lucene.xmlparser.FilterBuilder;
|
||||||
import org.apache.lucene.xmlparser.FilterBuilderFactory;
|
import org.apache.lucene.xmlparser.FilterBuilderFactory;
|
||||||
|
@ -105,7 +105,7 @@ public class CachedFilterBuilder implements FilterBuilder {
|
||||||
//cache miss
|
//cache miss
|
||||||
if (qb != null)
|
if (qb != null)
|
||||||
{
|
{
|
||||||
cachedFilter = new QueryFilter(q);
|
cachedFilter = new QueryWrapperFilter(q);
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
cachedFilter = new CachingWrapperFilter(f);
|
cachedFilter = new CachingWrapperFilter(f);
|
||||||
|
|
|
@ -43,11 +43,19 @@ public class CachingSpanFilter extends SpanFilter {
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
SpanFilterResult result = getCachedResult(reader);
|
SpanFilterResult result = getCachedResult(reader);
|
||||||
return result != null ? result.getBits() : null;
|
return result != null ? result.getBits() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
SpanFilterResult result = getCachedResult(reader);
|
||||||
|
return result != null ? result.getDocIdSet() : null;
|
||||||
|
}
|
||||||
|
|
||||||
private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
|
private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
|
||||||
SpanFilterResult result = null;
|
SpanFilterResult result = null;
|
||||||
if (cache == null) {
|
if (cache == null) {
|
||||||
|
|
|
@ -43,6 +43,9 @@ public class CachingWrapperFilter extends Filter {
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
if (cache == null) {
|
if (cache == null) {
|
||||||
cache = new WeakHashMap();
|
cache = new WeakHashMap();
|
||||||
|
@ -63,6 +66,28 @@ public class CachingWrapperFilter extends Filter {
|
||||||
|
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
if (cache == null) {
|
||||||
|
cache = new WeakHashMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized (cache) { // check cache
|
||||||
|
DocIdSet cached = (DocIdSet) cache.get(reader);
|
||||||
|
if (cached != null) {
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final DocIdSet docIdSet = filter.getDocIdSet(reader);
|
||||||
|
|
||||||
|
synchronized (cache) { // update cache
|
||||||
|
cache.put(reader, docIdSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
return docIdSet;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "CachingWrapperFilter("+filter+")";
|
return "CachingWrapperFilter("+filter+")";
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class ConstantScoreQuery extends Query {
|
||||||
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
||||||
|
|
||||||
ConstantScorer cs = (ConstantScorer)scorer(reader);
|
ConstantScorer cs = (ConstantScorer)scorer(reader);
|
||||||
boolean exists = cs.bits.get(doc);
|
boolean exists = cs.docIdSetIterator.skipTo(doc) && (cs.docIdSetIterator.doc() == doc);
|
||||||
|
|
||||||
ComplexExplanation result = new ComplexExplanation();
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
|
|
||||||
|
@ -107,23 +107,22 @@ public class ConstantScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected class ConstantScorer extends Scorer {
|
protected class ConstantScorer extends Scorer {
|
||||||
final BitSet bits;
|
final DocIdSetIterator docIdSetIterator;
|
||||||
final float theScore;
|
final float theScore;
|
||||||
int doc=-1;
|
int doc=-1;
|
||||||
|
|
||||||
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
|
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
|
||||||
super(similarity);
|
super(similarity);
|
||||||
theScore = w.getValue();
|
theScore = w.getValue();
|
||||||
bits = filter.bits(reader);
|
docIdSetIterator = filter.getDocIdSet(reader).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
doc = bits.nextSetBit(doc+1);
|
return docIdSetIterator.next();
|
||||||
return doc >= 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int doc() {
|
public int doc() {
|
||||||
return doc;
|
return docIdSetIterator.doc();
|
||||||
}
|
}
|
||||||
|
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
|
@ -131,8 +130,7 @@ public class ConstantScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean skipTo(int target) throws IOException {
|
public boolean skipTo(int target) throws IOException {
|
||||||
doc = bits.nextSetBit(target); // requires JDK 1.4
|
return docIdSetIterator.skipTo(target);
|
||||||
return doc >= 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Explanation explain(int doc) throws IOException {
|
public Explanation explain(int doc) throws IOException {
|
||||||
|
@ -170,3 +168,4 @@ public class ConstantScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A DocIdSet contains a set of doc ids. Implementing classes must provide
|
||||||
|
* a {@link DocIdSetIterator} to access the set.
|
||||||
|
*/
|
||||||
|
public abstract class DocIdSet {
|
||||||
|
public abstract DocIdSetIterator iterator();
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This abstract class defines methods to iterate over a set of
|
||||||
|
* non-decreasing doc ids.
|
||||||
|
*/
|
||||||
|
public abstract class DocIdSetIterator {
|
||||||
|
/** Returns the current document number. <p> This is invalid until {@link
|
||||||
|
#next()} is called for the first time.*/
|
||||||
|
public abstract int doc();
|
||||||
|
|
||||||
|
/** Moves to the next docId in the set. Returns true, iff
|
||||||
|
* there is such a docId. */
|
||||||
|
public abstract boolean next() throws IOException;
|
||||||
|
|
||||||
|
/** Skips entries to the first beyond the current whose document number is
|
||||||
|
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||||
|
* an entry. <p>Behaves as if written: <pre>
|
||||||
|
* boolean skipTo(int target) {
|
||||||
|
* do {
|
||||||
|
* if (!next())
|
||||||
|
* return false;
|
||||||
|
* } while (target > doc());
|
||||||
|
* return true;
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
* Some implementations are considerably more efficient than that.
|
||||||
|
*/
|
||||||
|
public abstract boolean skipTo(int target) throws IOException;
|
||||||
|
}
|
|
@ -20,11 +20,32 @@ package org.apache.lucene.search;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
/** Abstract base class providing a mechanism to restrict searches to a subset
|
/** Abstract base class providing a mechanism to use a subset of an index
|
||||||
of an index. */
|
* for restriction or permission of index search results.
|
||||||
|
* <p>
|
||||||
|
* <b>Note:</b> In Lucene 3.0 {@link #bits(IndexReader)} will be removed
|
||||||
|
* and {@link #getDocIdSet(IndexReader)} will be defined as abstract.
|
||||||
|
* All implementing classes must therefore implement {@link #getDocIdSet(IndexReader)}
|
||||||
|
* in order to work with Lucene 3.0.
|
||||||
|
*/
|
||||||
public abstract class Filter implements java.io.Serializable {
|
public abstract class Filter implements java.io.Serializable {
|
||||||
/** Returns a BitSet with true for documents which should be permitted in
|
/**
|
||||||
search results, and false for those that should not. */
|
* @return A BitSet with true for documents which should be permitted in
|
||||||
public abstract BitSet bits(IndexReader reader) throws IOException;
|
* search results, and false for those that should not.
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
|
*/
|
||||||
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a DocIdSet that provides the documents which should be
|
||||||
|
* permitted or prohibited in search results.
|
||||||
|
* @see DocIdBitSet
|
||||||
|
*/
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
return new DocIdBitSet(bits(reader));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +46,7 @@ extends Query {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new query which applies a filter to the results of the original query.
|
* Constructs a new query which applies a filter to the results of the original query.
|
||||||
* Filter.bits() will be called every time this query is used in a search.
|
* Filter.getDocIdSet() will be called every time this query is used in a search.
|
||||||
* @param query Query to be filtered, cannot be <code>null</code>.
|
* @param query Query to be filtered, cannot be <code>null</code>.
|
||||||
* @param filter Filter to apply to query results, cannot be <code>null</code>.
|
* @param filter Filter to apply to query results, cannot be <code>null</code>.
|
||||||
*/
|
*/
|
||||||
|
@ -86,13 +85,15 @@ extends Query {
|
||||||
inner.addDetail(preBoost);
|
inner.addDetail(preBoost);
|
||||||
}
|
}
|
||||||
Filter f = FilteredQuery.this.filter;
|
Filter f = FilteredQuery.this.filter;
|
||||||
BitSet matches = f.bits(ir);
|
DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator();
|
||||||
if (matches.get(i))
|
if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
|
||||||
return inner;
|
return inner;
|
||||||
Explanation result = new Explanation
|
} else {
|
||||||
(0.0f, "failure to match filter: " + f.toString());
|
Explanation result = new Explanation
|
||||||
result.addDetail(inner);
|
(0.0f, "failure to match filter: " + f.toString());
|
||||||
return result;
|
result.addDetail(inner);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return this query
|
// return this query
|
||||||
|
@ -100,50 +101,49 @@ extends Query {
|
||||||
|
|
||||||
// return a filtering scorer
|
// return a filtering scorer
|
||||||
public Scorer scorer (IndexReader indexReader) throws IOException {
|
public Scorer scorer (IndexReader indexReader) throws IOException {
|
||||||
final Scorer scorer = weight.scorer (indexReader);
|
final Scorer scorer = weight.scorer(indexReader);
|
||||||
final BitSet bitset = filter.bits (indexReader);
|
final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator();
|
||||||
return new Scorer (similarity) {
|
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
return new Scorer(similarity) {
|
||||||
do {
|
|
||||||
if (! scorer.next()) {
|
private boolean advanceToCommon() throws IOException {
|
||||||
|
while (scorer.doc() != docIdSetIterator.doc()) {
|
||||||
|
if (scorer.doc() < docIdSetIterator.doc()) {
|
||||||
|
if (!scorer.skipTo(docIdSetIterator.doc())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (!docIdSetIterator.skipTo(scorer.doc())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} while (! bitset.get(scorer.doc()));
|
}
|
||||||
/* When skipTo() is allowed on scorer it should be used here
|
|
||||||
* in combination with bitset.nextSetBit(...)
|
|
||||||
* See the while loop in skipTo() below.
|
|
||||||
*/
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
return docIdSetIterator.next() && scorer.next() && advanceToCommon();
|
||||||
|
}
|
||||||
|
|
||||||
public int doc() { return scorer.doc(); }
|
public int doc() { return scorer.doc(); }
|
||||||
|
|
||||||
public boolean skipTo(int i) throws IOException {
|
public boolean skipTo(int i) throws IOException {
|
||||||
if (! scorer.skipTo(i)) {
|
return docIdSetIterator.skipTo(i)
|
||||||
return false;
|
&& scorer.skipTo(docIdSetIterator.doc())
|
||||||
}
|
&& advanceToCommon();
|
||||||
while (! bitset.get(scorer.doc())) {
|
}
|
||||||
int nextFiltered = bitset.nextSetBit(scorer.doc() + 1);
|
|
||||||
if (nextFiltered == -1) {
|
|
||||||
return false;
|
|
||||||
} else if (! scorer.skipTo(nextFiltered)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public float score() throws IOException { return getBoost() * scorer.score(); }
|
public float score() throws IOException { return getBoost() * scorer.score(); }
|
||||||
|
|
||||||
// add an explanation about whether the document was filtered
|
// add an explanation about whether the document was filtered
|
||||||
public Explanation explain (int i) throws IOException {
|
public Explanation explain (int i) throws IOException {
|
||||||
Explanation exp = scorer.explain (i);
|
Explanation exp = scorer.explain(i);
|
||||||
exp.setValue(getBoost() * exp.getValue());
|
|
||||||
|
|
||||||
if (bitset.get(i))
|
if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
|
||||||
exp.setDescription ("allowed by filter: "+exp.getDescription());
|
exp.setDescription ("allowed by filter: "+exp.getDescription());
|
||||||
else
|
exp.setValue(getBoost() * exp.getValue());
|
||||||
|
} else {
|
||||||
exp.setDescription ("removed by filter: "+exp.getDescription());
|
exp.setDescription ("removed by filter: "+exp.getDescription());
|
||||||
|
exp.setValue(0.0f);
|
||||||
|
}
|
||||||
return exp;
|
return exp;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -128,22 +128,33 @@ public class IndexSearcher extends Searcher {
|
||||||
// inherit javadoc
|
// inherit javadoc
|
||||||
public void search(Weight weight, Filter filter,
|
public void search(Weight weight, Filter filter,
|
||||||
final HitCollector results) throws IOException {
|
final HitCollector results) throws IOException {
|
||||||
HitCollector collector = results;
|
|
||||||
if (filter != null) {
|
|
||||||
final BitSet bits = filter.bits(reader);
|
|
||||||
collector = new HitCollector() {
|
|
||||||
public final void collect(int doc, float score) {
|
|
||||||
if (bits.get(doc)) { // skip docs not in bits
|
|
||||||
results.collect(doc, score);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Scorer scorer = weight.scorer(reader);
|
Scorer scorer = weight.scorer(reader);
|
||||||
if (scorer == null)
|
if (scorer == null)
|
||||||
return;
|
return;
|
||||||
scorer.score(collector);
|
|
||||||
|
if (filter == null) {
|
||||||
|
scorer.score(results);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocIdSetIterator docIdSetIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here?
|
||||||
|
boolean more = docIdSetIterator.next();
|
||||||
|
while (more) {
|
||||||
|
int filterDocId = docIdSetIterator.doc();
|
||||||
|
if (! scorer.skipTo(filterDocId)) {
|
||||||
|
more = false;
|
||||||
|
} else {
|
||||||
|
int scorerDocId = scorer.doc();
|
||||||
|
if (scorerDocId == filterDocId) { // permitted by filter
|
||||||
|
results.collect(scorerDocId, scorer.score());
|
||||||
|
more = docIdSetIterator.skipTo(scorerDocId + 1);
|
||||||
|
} else {
|
||||||
|
more = docIdSetIterator.skipTo(scorerDocId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query rewrite(Query original) throws IOException {
|
public Query rewrite(Query original) throws IOException {
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.TermEnum;
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
@ -39,6 +40,9 @@ public class PrefixFilter extends Filter {
|
||||||
|
|
||||||
public Term getPrefix() { return prefix; }
|
public Term getPrefix() { return prefix; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
final BitSet bitSet = new BitSet(reader.maxDoc());
|
final BitSet bitSet = new BitSet(reader.maxDoc());
|
||||||
new PrefixGenerator(prefix) {
|
new PrefixGenerator(prefix) {
|
||||||
|
@ -48,6 +52,16 @@ public class PrefixFilter extends Filter {
|
||||||
}.generate(reader);
|
}.generate(reader);
|
||||||
return bitSet;
|
return bitSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
|
||||||
|
new PrefixGenerator(prefix) {
|
||||||
|
public void handleDoc(int doc) {
|
||||||
|
bitSet.set(doc);
|
||||||
|
}
|
||||||
|
}.generate(reader);
|
||||||
|
return bitSet;
|
||||||
|
}
|
||||||
|
|
||||||
/** Prints a user-readable version of this query. */
|
/** Prints a user-readable version of this query. */
|
||||||
public String toString () {
|
public String toString () {
|
||||||
|
@ -105,3 +119,4 @@ abstract class PrefixGenerator implements IdGenerator {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constrains search results to only match those which also match a provided
|
* Constrains search results to only match those which also match a provided
|
||||||
|
@ -44,6 +45,9 @@ public class QueryWrapperFilter extends Filter {
|
||||||
this.query = query;
|
this.query = query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
final BitSet bits = new BitSet(reader.maxDoc());
|
final BitSet bits = new BitSet(reader.maxDoc());
|
||||||
|
|
||||||
|
@ -54,6 +58,17 @@ public class QueryWrapperFilter extends Filter {
|
||||||
});
|
});
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||||
|
|
||||||
|
new IndexSearcher(reader).search(query, new HitCollector() {
|
||||||
|
public final void collect(int doc, float score) {
|
||||||
|
bits.set(doc); // set bit for hit
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "QueryWrapperFilter(" + query + ")";
|
return "QueryWrapperFilter(" + query + ")";
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermDocs;
|
import org.apache.lucene.index.TermDocs;
|
||||||
import org.apache.lucene.index.TermEnum;
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
@ -94,6 +95,7 @@ public class RangeFilter extends Filter {
|
||||||
* Returns a BitSet with true for documents which should be
|
* Returns a BitSet with true for documents which should be
|
||||||
* permitted in search results, and false for those that should
|
* permitted in search results, and false for those that should
|
||||||
* not.
|
* not.
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
*/
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
BitSet bits = new BitSet(reader.maxDoc());
|
BitSet bits = new BitSet(reader.maxDoc());
|
||||||
|
@ -152,6 +154,68 @@ public class RangeFilter extends Filter {
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a DocIdSet with documents that should be
|
||||||
|
* permitted in search results.
|
||||||
|
*/
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||||
|
|
||||||
|
TermEnum enumerator =
|
||||||
|
(null != lowerTerm
|
||||||
|
? reader.terms(new Term(fieldName, lowerTerm))
|
||||||
|
: reader.terms(new Term(fieldName,"")));
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
if (enumerator.term() == null) {
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean checkLower = false;
|
||||||
|
if (!includeLower) // make adjustments to set to exclusive
|
||||||
|
checkLower = true;
|
||||||
|
|
||||||
|
TermDocs termDocs = reader.termDocs();
|
||||||
|
try {
|
||||||
|
|
||||||
|
do {
|
||||||
|
Term term = enumerator.term();
|
||||||
|
if (term != null && term.field().equals(fieldName)) {
|
||||||
|
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
|
||||||
|
checkLower = false;
|
||||||
|
if (upperTerm != null) {
|
||||||
|
int compare = upperTerm.compareTo(term.text());
|
||||||
|
/* if beyond the upper term, or is exclusive and
|
||||||
|
* this is equal to the upper term, break out */
|
||||||
|
if ((compare < 0) ||
|
||||||
|
(!includeUpper && compare==0)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* we have a good term, find the docs */
|
||||||
|
|
||||||
|
termDocs.seek(enumerator.term());
|
||||||
|
while (termDocs.next()) {
|
||||||
|
bits.set(termDocs.doc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (enumerator.next());
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
termDocs.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
enumerator.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuffer buffer = new StringBuffer();
|
StringBuffer buffer = new StringBuffer();
|
||||||
buffer.append(fieldName);
|
buffer.append(fieldName);
|
||||||
|
|
|
@ -50,9 +50,21 @@ public class RemoteCachingWrapperFilter extends Filter {
|
||||||
* searcher side of a remote connection.
|
* searcher side of a remote connection.
|
||||||
* @param reader the index reader for the Filter
|
* @param reader the index reader for the Filter
|
||||||
* @return the bitset
|
* @return the bitset
|
||||||
|
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||||
*/
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
|
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
|
||||||
return cachedFilter.bits(reader);
|
return cachedFilter.bits(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the {@link FilterManager} to keep the cache for a filter on the
|
||||||
|
* searcher side of a remote connection.
|
||||||
|
* @param reader the index reader for the Filter
|
||||||
|
* @return the DocIdSet
|
||||||
|
*/
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
|
||||||
|
return cachedFilter.getDocIdSet(reader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ import java.io.IOException;
|
||||||
* </p>
|
* </p>
|
||||||
* @see BooleanQuery#setAllowDocsOutOfOrder
|
* @see BooleanQuery#setAllowDocsOutOfOrder
|
||||||
*/
|
*/
|
||||||
public abstract class Scorer {
|
public abstract class Scorer extends DocIdSetIterator {
|
||||||
private Similarity similarity;
|
private Similarity similarity;
|
||||||
|
|
||||||
/** Constructs a Scorer.
|
/** Constructs a Scorer.
|
||||||
|
@ -76,65 +76,12 @@ public abstract class Scorer {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Advances to the document matching this Scorer with the lowest doc Id
|
|
||||||
* greater than the current value of {@link #doc()} (or to the matching
|
|
||||||
* document with the lowest doc Id if next has never been called on
|
|
||||||
* this Scorer).
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* When this method is used the {@link #explain(int)} method should not
|
|
||||||
* be used.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @return true iff there is another document matching the query.
|
|
||||||
* @see BooleanQuery#setAllowDocsOutOfOrder
|
|
||||||
*/
|
|
||||||
public abstract boolean next() throws IOException;
|
|
||||||
|
|
||||||
/** Returns the current document number matching the query.
|
|
||||||
* Initially invalid, until {@link #next()} is called the first time.
|
|
||||||
*/
|
|
||||||
public abstract int doc();
|
|
||||||
|
|
||||||
/** Returns the score of the current document matching the query.
|
/** Returns the score of the current document matching the query.
|
||||||
* Initially invalid, until {@link #next()} or {@link #skipTo(int)}
|
* Initially invalid, until {@link #next()} or {@link #skipTo(int)}
|
||||||
* is called the first time.
|
* is called the first time.
|
||||||
*/
|
*/
|
||||||
public abstract float score() throws IOException;
|
public abstract float score() throws IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
* Skips to the document matching this Scorer with the lowest doc Id
|
|
||||||
* greater than or equal to a given target.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* The behavior of this method is undefined if the target specified is
|
|
||||||
* less than or equal to the current value of {@link #doc()}.
|
|
||||||
* <p>
|
|
||||||
* Behaves as if written:
|
|
||||||
* <pre>
|
|
||||||
* boolean skipTo(int target) {
|
|
||||||
* do {
|
|
||||||
* if (!next())
|
|
||||||
* return false;
|
|
||||||
* } while (target > doc());
|
|
||||||
* return true;
|
|
||||||
* }
|
|
||||||
* </pre>
|
|
||||||
* Most implementations are considerably more efficient than that.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* When this method is used the {@link #explain(int)} method should not
|
|
||||||
* be used.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @param target The target document number.
|
|
||||||
* @return true iff there is such a match.
|
|
||||||
* @see BooleanQuery#setAllowDocsOutOfOrder
|
|
||||||
*/
|
|
||||||
public abstract boolean skipTo(int target) throws IOException;
|
|
||||||
|
|
||||||
/** Returns an explanation of the score for a document.
|
/** Returns an explanation of the score for a document.
|
||||||
* <br>When this method is used, the {@link #next()}, {@link #skipTo(int)} and
|
* <br>When this method is used, the {@link #next()}, {@link #skipTo(int)} and
|
||||||
* {@link #score(HitCollector)} methods should not be used.
|
* {@link #score(HitCollector)} methods should not be used.
|
||||||
|
|
|
@ -48,7 +48,7 @@ public interface Searchable extends java.rmi.Remote {
|
||||||
* non-high-scoring hits.
|
* non-high-scoring hits.
|
||||||
*
|
*
|
||||||
* @param weight to match documents
|
* @param weight to match documents
|
||||||
* @param filter if non-null, a bitset used to eliminate some documents
|
* @param filter if non-null, used to permit documents to be collected.
|
||||||
* @param results to receive hits
|
* @param results to receive hits
|
||||||
* @throws BooleanQuery.TooManyClauses
|
* @throws BooleanQuery.TooManyClauses
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -109,7 +109,7 @@ public abstract class Searcher implements Searchable {
|
||||||
* non-high-scoring hits.
|
* non-high-scoring hits.
|
||||||
*
|
*
|
||||||
* @param query to match documents
|
* @param query to match documents
|
||||||
* @param filter if non-null, a bitset used to eliminate some documents
|
* @param filter if non-null, used to permit documents to be collected.
|
||||||
* @param results to receive hits
|
* @param results to receive hits
|
||||||
* @throws BooleanQuery.TooManyClauses
|
* @throws BooleanQuery.TooManyClauses
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -30,7 +30,7 @@ import java.io.IOException;
|
||||||
public abstract class SpanFilter extends Filter{
|
public abstract class SpanFilter extends Filter{
|
||||||
/** Returns a SpanFilterResult with true for documents which should be permitted in
|
/** Returns a SpanFilterResult with true for documents which should be permitted in
|
||||||
search results, and false for those that should not and Spans for where the true docs match.
|
search results, and false for those that should not and Spans for where the true docs match.
|
||||||
* @param reader The {@link org.apache.lucene.index.IndexReader} to load position and bitset information from
|
* @param reader The {@link org.apache.lucene.index.IndexReader} to load position and DocIdSet information from
|
||||||
* @return A {@link SpanFilterResult}
|
* @return A {@link SpanFilterResult}
|
||||||
* @throws java.io.IOException if there was an issue accessing the necessary information
|
* @throws java.io.IOException if there was an issue accessing the necessary information
|
||||||
* */
|
* */
|
||||||
|
|
|
@ -28,19 +28,33 @@ import java.util.List;
|
||||||
*
|
*
|
||||||
**/
|
**/
|
||||||
public class SpanFilterResult {
|
public class SpanFilterResult {
|
||||||
|
/** @deprecated */
|
||||||
private BitSet bits;
|
private BitSet bits;
|
||||||
|
|
||||||
|
private DocIdSet docIdSet;
|
||||||
private List positions;//Spans spans;
|
private List positions;//Spans spans;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param bits The bits for the Filter
|
* @param bits The bits for the Filter
|
||||||
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
|
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
|
||||||
|
* @deprecated Use {@link #SpanFilterResult(DocIdSet, List)} instead
|
||||||
*/
|
*/
|
||||||
public SpanFilterResult(BitSet bits, List positions) {
|
public SpanFilterResult(BitSet bits, List positions) {
|
||||||
this.bits = bits;
|
this.bits = bits;
|
||||||
this.positions = positions;
|
this.positions = positions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param docIdSet The DocIdSet for the Filter
|
||||||
|
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
|
||||||
|
*/
|
||||||
|
public SpanFilterResult(DocIdSet docIdSet, List positions) {
|
||||||
|
this.docIdSet = docIdSet;
|
||||||
|
this.positions = positions;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The first entry in the array corresponds to the first "on" bit.
|
* The first entry in the array corresponds to the first "on" bit.
|
||||||
* Entries are increasing by document order
|
* Entries are increasing by document order
|
||||||
|
@ -50,11 +64,17 @@ public class SpanFilterResult {
|
||||||
return positions;
|
return positions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getDocIdSet()}
|
||||||
|
*/
|
||||||
public BitSet getBits() {
|
public BitSet getBits() {
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the docIdSet */
|
||||||
|
public DocIdSet getDocIdSet() {
|
||||||
|
return docIdSet;
|
||||||
|
}
|
||||||
|
|
||||||
public static class PositionInfo {
|
public static class PositionInfo {
|
||||||
private int doc;
|
private int doc;
|
||||||
|
@ -115,3 +135,4 @@ public class SpanFilterResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.Spans;
|
import org.apache.lucene.search.spans.Spans;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -54,15 +55,14 @@ public class SpanQueryFilter extends SpanFilter {
|
||||||
this.query = query;
|
this.query = query;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
SpanFilterResult result = bitSpans(reader);
|
SpanFilterResult result = bitSpans(reader);
|
||||||
return result.getBits();
|
return result.getDocIdSet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
|
public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
|
||||||
|
|
||||||
final BitSet bits = new BitSet(reader.maxDoc());
|
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||||
Spans spans = query.getSpans(reader);
|
Spans spans = query.getSpans(reader);
|
||||||
List tmp = new ArrayList(20);
|
List tmp = new ArrayList(20);
|
||||||
int currentDoc = -1;
|
int currentDoc = -1;
|
||||||
|
|
|
@ -0,0 +1,799 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util; // from org.apache.solr.util rev 555343
|
||||||
|
|
||||||
|
/** A variety of high efficiencly bit twiddling routines.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class BitUtil {
|
||||||
|
|
||||||
|
/** Returns the number of bits set in the long */
|
||||||
|
public static int pop(long x) {
|
||||||
|
/* Hacker's Delight 32 bit pop function:
|
||||||
|
* http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
|
||||||
|
*
|
||||||
|
int pop(unsigned x) {
|
||||||
|
x = x - ((x >> 1) & 0x55555555);
|
||||||
|
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||||
|
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
||||||
|
x = x + (x >> 8);
|
||||||
|
x = x + (x >> 16);
|
||||||
|
return x & 0x0000003F;
|
||||||
|
}
|
||||||
|
***/
|
||||||
|
|
||||||
|
// 64 bit java version of the C function from above
|
||||||
|
x = x - ((x >>> 1) & 0x5555555555555555L);
|
||||||
|
x = (x & 0x3333333333333333L) + ((x >>>2 ) & 0x3333333333333333L);
|
||||||
|
x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL;
|
||||||
|
x = x + (x >>> 8);
|
||||||
|
x = x + (x >>> 16);
|
||||||
|
x = x + (x >>> 32);
|
||||||
|
return ((int)x) & 0x7F;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** Returns the number of set bits in an array of longs. */
|
||||||
|
public static long pop_array(long A[], int wordOffset, int numWords) {
|
||||||
|
/*
|
||||||
|
* Robert Harley and David Seal's bit counting algorithm, as documented
|
||||||
|
* in the revisions of Hacker's Delight
|
||||||
|
* http://www.hackersdelight.org/revisions.pdf
|
||||||
|
* http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
|
||||||
|
*
|
||||||
|
* This function was adapted to Java, and extended to use 64 bit words.
|
||||||
|
* if only we had access to wider registers like SSE from java...
|
||||||
|
*
|
||||||
|
* This function can be transformed to compute the popcount of other functions
|
||||||
|
* on bitsets via something like this:
|
||||||
|
* sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int n = wordOffset+numWords;
|
||||||
|
long tot=0, tot8=0;
|
||||||
|
long ones=0, twos=0, fours=0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = wordOffset; i <= n - 8; i+=8) {
|
||||||
|
/*** C macro from Hacker's Delight
|
||||||
|
#define CSA(h,l, a,b,c) \
|
||||||
|
{unsigned u = a ^ b; unsigned v = c; \
|
||||||
|
h = (a & b) | (u & v); l = u ^ v;}
|
||||||
|
***/
|
||||||
|
|
||||||
|
long twosA,twosB,foursA,foursB,eights;
|
||||||
|
|
||||||
|
// CSA(twosA, ones, ones, A[i], A[i+1])
|
||||||
|
{
|
||||||
|
long b=A[i], c=A[i+1];
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, A[i+2], A[i+3])
|
||||||
|
{
|
||||||
|
long b=A[i+2], c=A[i+3];
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursA, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
//CSA(twosA, ones, ones, A[i+4], A[i+5])
|
||||||
|
{
|
||||||
|
long b=A[i+4], c=A[i+5];
|
||||||
|
long u=ones^b;
|
||||||
|
twosA=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, A[i+6], A[i+7])
|
||||||
|
{
|
||||||
|
long b=A[i+6], c=A[i+7];
|
||||||
|
long u=ones^b;
|
||||||
|
twosB=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursB, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursB=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CSA(eights, fours, fours, foursA, foursB)
|
||||||
|
{
|
||||||
|
long u=fours^foursA;
|
||||||
|
eights=(fours&foursA)|(u&foursB);
|
||||||
|
fours=u^foursB;
|
||||||
|
}
|
||||||
|
tot8 += pop(eights);
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle trailing words in a binary-search manner...
|
||||||
|
// derived from the loop above by setting specific elements to 0.
|
||||||
|
// the original method in Hackers Delight used a simple for loop:
|
||||||
|
// for (i = i; i < n; i++) // Add in the last elements
|
||||||
|
// tot = tot + pop(A[i]);
|
||||||
|
|
||||||
|
if (i<=n-4) {
|
||||||
|
long twosA, twosB, foursA, eights;
|
||||||
|
{
|
||||||
|
long b=A[i], c=A[i+1];
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long b=A[i+2], c=A[i+3];
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<=n-2) {
|
||||||
|
long b=A[i], c=A[i+1];
|
||||||
|
long u=ones ^ b;
|
||||||
|
long twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
|
||||||
|
long foursA=twos&twosA;
|
||||||
|
twos=twos^twosA;
|
||||||
|
|
||||||
|
long eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<n) {
|
||||||
|
tot += pop(A[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
tot += (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones)
|
||||||
|
+ (tot8<<3);
|
||||||
|
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of the two sets after an intersection.
|
||||||
|
* Neither array is modified.
|
||||||
|
*/
|
||||||
|
public static long pop_intersect(long A[], long B[], int wordOffset, int numWords) {
|
||||||
|
// generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
|
||||||
|
int n = wordOffset+numWords;
|
||||||
|
long tot=0, tot8=0;
|
||||||
|
long ones=0, twos=0, fours=0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = wordOffset; i <= n - 8; i+=8) {
|
||||||
|
long twosA,twosB,foursA,foursB,eights;
|
||||||
|
|
||||||
|
// CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1]))
|
||||||
|
{
|
||||||
|
long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3]))
|
||||||
|
{
|
||||||
|
long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursA, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
//CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5]))
|
||||||
|
{
|
||||||
|
long b=(A[i+4] & B[i+4]), c=(A[i+5] & B[i+5]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosA=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7]))
|
||||||
|
{
|
||||||
|
long b=(A[i+6] & B[i+6]), c=(A[i+7] & B[i+7]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursB, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursB=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CSA(eights, fours, fours, foursA, foursB)
|
||||||
|
{
|
||||||
|
long u=fours^foursA;
|
||||||
|
eights=(fours&foursA)|(u&foursB);
|
||||||
|
fours=u^foursB;
|
||||||
|
}
|
||||||
|
tot8 += pop(eights);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (i<=n-4) {
|
||||||
|
long twosA, twosB, foursA, eights;
|
||||||
|
{
|
||||||
|
long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<=n-2) {
|
||||||
|
long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
long twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
|
||||||
|
long foursA=twos&twosA;
|
||||||
|
twos=twos^twosA;
|
||||||
|
|
||||||
|
long eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<n) {
|
||||||
|
tot += pop((A[i] & B[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
tot += (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones)
|
||||||
|
+ (tot8<<3);
|
||||||
|
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of the union of two sets.
|
||||||
|
* Neither array is modified.
|
||||||
|
*/
|
||||||
|
public static long pop_union(long A[], long B[], int wordOffset, int numWords) {
|
||||||
|
// generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g'
|
||||||
|
int n = wordOffset+numWords;
|
||||||
|
long tot=0, tot8=0;
|
||||||
|
long ones=0, twos=0, fours=0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = wordOffset; i <= n - 8; i+=8) {
|
||||||
|
/*** C macro from Hacker's Delight
|
||||||
|
#define CSA(h,l, a,b,c) \
|
||||||
|
{unsigned u = a ^ b; unsigned v = c; \
|
||||||
|
h = (a & b) | (u & v); l = u ^ v;}
|
||||||
|
***/
|
||||||
|
|
||||||
|
long twosA,twosB,foursA,foursB,eights;
|
||||||
|
|
||||||
|
// CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1]))
|
||||||
|
{
|
||||||
|
long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3]))
|
||||||
|
{
|
||||||
|
long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursA, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
//CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5]))
|
||||||
|
{
|
||||||
|
long b=(A[i+4] | B[i+4]), c=(A[i+5] | B[i+5]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosA=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7]))
|
||||||
|
{
|
||||||
|
long b=(A[i+6] | B[i+6]), c=(A[i+7] | B[i+7]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursB, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursB=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CSA(eights, fours, fours, foursA, foursB)
|
||||||
|
{
|
||||||
|
long u=fours^foursA;
|
||||||
|
eights=(fours&foursA)|(u&foursB);
|
||||||
|
fours=u^foursB;
|
||||||
|
}
|
||||||
|
tot8 += pop(eights);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (i<=n-4) {
|
||||||
|
long twosA, twosB, foursA, eights;
|
||||||
|
{
|
||||||
|
long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<=n-2) {
|
||||||
|
long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
long twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
|
||||||
|
long foursA=twos&twosA;
|
||||||
|
twos=twos^twosA;
|
||||||
|
|
||||||
|
long eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<n) {
|
||||||
|
tot += pop((A[i] | B[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
tot += (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones)
|
||||||
|
+ (tot8<<3);
|
||||||
|
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of A & ~B
|
||||||
|
* Neither array is modified.
|
||||||
|
*/
|
||||||
|
public static long pop_andnot(long A[], long B[], int wordOffset, int numWords) {
|
||||||
|
// generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& ~B[\1]\)/g'
|
||||||
|
int n = wordOffset+numWords;
|
||||||
|
long tot=0, tot8=0;
|
||||||
|
long ones=0, twos=0, fours=0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = wordOffset; i <= n - 8; i+=8) {
|
||||||
|
/*** C macro from Hacker's Delight
|
||||||
|
#define CSA(h,l, a,b,c) \
|
||||||
|
{unsigned u = a ^ b; unsigned v = c; \
|
||||||
|
h = (a & b) | (u & v); l = u ^ v;}
|
||||||
|
***/
|
||||||
|
|
||||||
|
long twosA,twosB,foursA,foursB,eights;
|
||||||
|
|
||||||
|
// CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1]))
|
||||||
|
{
|
||||||
|
long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3]))
|
||||||
|
{
|
||||||
|
long b=(A[i+2] & ~B[i+2]), c=(A[i+3] & ~B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursA, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
//CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5]))
|
||||||
|
{
|
||||||
|
long b=(A[i+4] & ~B[i+4]), c=(A[i+5] & ~B[i+5]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosA=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7]))
|
||||||
|
{
|
||||||
|
long b=(A[i+6] & ~B[i+6]), c=(A[i+7] & ~B[i+7]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursB, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursB=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CSA(eights, fours, fours, foursA, foursB)
|
||||||
|
{
|
||||||
|
long u=fours^foursA;
|
||||||
|
eights=(fours&foursA)|(u&foursB);
|
||||||
|
fours=u^foursB;
|
||||||
|
}
|
||||||
|
tot8 += pop(eights);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (i<=n-4) {
|
||||||
|
long twosA, twosB, foursA, eights;
|
||||||
|
{
|
||||||
|
long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long b=(A[i+2] & ~B[i+2]), c=(A[i+3] & ~B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<=n-2) {
|
||||||
|
long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
long twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
|
||||||
|
long foursA=twos&twosA;
|
||||||
|
twos=twos^twosA;
|
||||||
|
|
||||||
|
long eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<n) {
|
||||||
|
tot += pop((A[i] & ~B[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
tot += (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones)
|
||||||
|
+ (tot8<<3);
|
||||||
|
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long pop_xor(long A[], long B[], int wordOffset, int numWords) {
|
||||||
|
int n = wordOffset+numWords;
|
||||||
|
long tot=0, tot8=0;
|
||||||
|
long ones=0, twos=0, fours=0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = wordOffset; i <= n - 8; i+=8) {
|
||||||
|
/*** C macro from Hacker's Delight
|
||||||
|
#define CSA(h,l, a,b,c) \
|
||||||
|
{unsigned u = a ^ b; unsigned v = c; \
|
||||||
|
h = (a & b) | (u & v); l = u ^ v;}
|
||||||
|
***/
|
||||||
|
|
||||||
|
long twosA,twosB,foursA,foursB,eights;
|
||||||
|
|
||||||
|
// CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1]))
|
||||||
|
{
|
||||||
|
long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3]))
|
||||||
|
{
|
||||||
|
long b=(A[i+2] ^ B[i+2]), c=(A[i+3] ^ B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursA, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
//CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5]))
|
||||||
|
{
|
||||||
|
long b=(A[i+4] ^ B[i+4]), c=(A[i+5] ^ B[i+5]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosA=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
// CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7]))
|
||||||
|
{
|
||||||
|
long b=(A[i+6] ^ B[i+6]), c=(A[i+7] ^ B[i+7]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB=(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
//CSA(foursB, twos, twos, twosA, twosB)
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursB=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CSA(eights, fours, fours, foursA, foursB)
|
||||||
|
{
|
||||||
|
long u=fours^foursA;
|
||||||
|
eights=(fours&foursA)|(u&foursB);
|
||||||
|
fours=u^foursB;
|
||||||
|
}
|
||||||
|
tot8 += pop(eights);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (i<=n-4) {
|
||||||
|
long twosA, twosB, foursA, eights;
|
||||||
|
{
|
||||||
|
long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long b=(A[i+2] ^ B[i+2]), c=(A[i+3] ^ B[i+3]);
|
||||||
|
long u=ones^b;
|
||||||
|
twosB =(ones&b)|(u&c);
|
||||||
|
ones=u^c;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
long u=twos^twosA;
|
||||||
|
foursA=(twos&twosA)|(u&twosB);
|
||||||
|
twos=u^twosB;
|
||||||
|
}
|
||||||
|
eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<=n-2) {
|
||||||
|
long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
|
||||||
|
long u=ones ^ b;
|
||||||
|
long twosA=(ones & b)|( u & c);
|
||||||
|
ones=u^c;
|
||||||
|
|
||||||
|
long foursA=twos&twosA;
|
||||||
|
twos=twos^twosA;
|
||||||
|
|
||||||
|
long eights=fours&foursA;
|
||||||
|
fours=fours^foursA;
|
||||||
|
|
||||||
|
tot8 += pop(eights);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i<n) {
|
||||||
|
tot += pop((A[i] ^ B[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
tot += (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones)
|
||||||
|
+ (tot8<<3);
|
||||||
|
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* python code to generate ntzTable
|
||||||
|
def ntz(val):
|
||||||
|
if val==0: return 8
|
||||||
|
i=0
|
||||||
|
while (val&0x01)==0:
|
||||||
|
i = i+1
|
||||||
|
val >>= 1
|
||||||
|
return i
|
||||||
|
print ','.join([ str(ntz(i)) for i in range(256) ])
|
||||||
|
***/
|
||||||
|
/** table of number of trailing zeros in a byte */
|
||||||
|
public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns number of trailing zeros in the 64 bit long value. */
|
||||||
|
public static int ntz(long val) {
|
||||||
|
// A full binary search to determine the low byte was slower than
|
||||||
|
// a linear search for nextSetBit(). This is most likely because
|
||||||
|
// the implementation of nextSetBit() shifts bits to the right, increasing
|
||||||
|
// the probability that the first non-zero byte is in the rhs.
|
||||||
|
//
|
||||||
|
// This implementation does a single binary search at the top level only
|
||||||
|
// so that all other bit shifting can be done on ints instead of longs to
|
||||||
|
// remain friendly to 32 bit architectures. In addition, the case of a
|
||||||
|
// non-zero first byte is checked for first because it is the most common
|
||||||
|
// in dense bit arrays.
|
||||||
|
|
||||||
|
int lower = (int)val;
|
||||||
|
int lowByte = lower & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte];
|
||||||
|
|
||||||
|
if (lower!=0) {
|
||||||
|
lowByte = (lower>>>8) & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte] + 8;
|
||||||
|
lowByte = (lower>>>16) & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte] + 16;
|
||||||
|
// no need to mask off low byte for the last byte in the 32 bit word
|
||||||
|
// no need to check for zero on the last byte either.
|
||||||
|
return ntzTable[lower>>>24] + 24;
|
||||||
|
} else {
|
||||||
|
// grab upper 32 bits
|
||||||
|
int upper=(int)(val>>32);
|
||||||
|
lowByte = upper & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte] + 32;
|
||||||
|
lowByte = (upper>>>8) & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte] + 40;
|
||||||
|
lowByte = (upper>>>16) & 0xff;
|
||||||
|
if (lowByte != 0) return ntzTable[lowByte] + 48;
|
||||||
|
// no need to mask off low byte for the last byte in the 32 bit word
|
||||||
|
// no need to check for zero on the last byte either.
|
||||||
|
return ntzTable[upper>>>24] + 56;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns 0 based index of first set bit
|
||||||
|
* (only works for x!=0)
|
||||||
|
* <br/> This is an alternate implementation of ntz()
|
||||||
|
*/
|
||||||
|
public static int ntz2(long x) {
|
||||||
|
int n = 0;
|
||||||
|
int y = (int)x;
|
||||||
|
if (y==0) {n+=32; y = (int)(x>>>32); } // the only 64 bit shift necessary
|
||||||
|
if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; }
|
||||||
|
if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; }
|
||||||
|
return (ntzTable[ y & 0xff ]) + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns 0 based index of first set bit
|
||||||
|
* <br/> This is an alternate implementation of ntz()
|
||||||
|
*/
|
||||||
|
public static int ntz3(long x) {
|
||||||
|
// another implementation taken from Hackers Delight, extended to 64 bits
|
||||||
|
// and converted to Java.
|
||||||
|
// Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc
|
||||||
|
int n = 1;
|
||||||
|
|
||||||
|
// do the first step as a long, all others as ints.
|
||||||
|
int y = (int)x;
|
||||||
|
if (y==0) {n+=32; y = (int)(x>>>32); }
|
||||||
|
if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; }
|
||||||
|
if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; }
|
||||||
|
if ((y & 0x0000000F) == 0) { n+=4; y>>>=4; }
|
||||||
|
if ((y & 0x00000003) == 0) { n+=2; y>>>=2; }
|
||||||
|
return n - (y & 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** returns true if v is a power of two or zero*/
|
||||||
|
public static boolean isPowerOfTwo(int v) {
|
||||||
|
return ((v & (v-1)) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns true if v is a power of two or zero*/
|
||||||
|
public static boolean isPowerOfTwo(long v) {
|
||||||
|
return ((v & (v-1)) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the next highest power of two, or the current value if it's already a power of two or zero*/
|
||||||
|
public static int nextHighestPowerOfTwo(int v) {
|
||||||
|
v--;
|
||||||
|
v |= v >> 1;
|
||||||
|
v |= v >> 2;
|
||||||
|
v |= v >> 4;
|
||||||
|
v |= v >> 8;
|
||||||
|
v |= v >> 16;
|
||||||
|
v++;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the next highest power of two, or the current value if it's already a power of two or zero*/
|
||||||
|
public static long nextHighestPowerOfTwo(long v) {
|
||||||
|
v--;
|
||||||
|
v |= v >> 1;
|
||||||
|
v |= v >> 2;
|
||||||
|
v |= v >> 4;
|
||||||
|
v |= v >> 8;
|
||||||
|
v |= v >> 16;
|
||||||
|
v |= v >> 32;
|
||||||
|
v++;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.BitSet;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
|
||||||
|
/** Simple DocIdSet and DocIdSetIterator backed by a BitSet */
|
||||||
|
public class DocIdBitSet extends DocIdSet {
|
||||||
|
private BitSet bitSet;
|
||||||
|
|
||||||
|
public DocIdBitSet(BitSet bitSet) {
|
||||||
|
this.bitSet = bitSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return new DocIdBitSetIterator(bitSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the underlying BitSet.
|
||||||
|
*/
|
||||||
|
public BitSet getBitSet() {
|
||||||
|
return this.bitSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DocIdBitSetIterator extends DocIdSetIterator {
|
||||||
|
private int docId;
|
||||||
|
private BitSet bitSet;
|
||||||
|
|
||||||
|
DocIdBitSetIterator(BitSet bitSet) {
|
||||||
|
this.bitSet = bitSet;
|
||||||
|
this.docId = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() {
|
||||||
|
assert docId != -1;
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() {
|
||||||
|
// (docId + 1) on next line requires -1 initial value for docNr:
|
||||||
|
return checkNextDocId(bitSet.nextSetBit(docId + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int skipDocNr) {
|
||||||
|
return checkNextDocId( bitSet.nextSetBit(skipDocNr));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkNextDocId(int d) {
|
||||||
|
if (d == -1) { // -1 returned by BitSet.nextSetBit() when exhausted
|
||||||
|
docId = Integer.MAX_VALUE;
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
docId = d;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,773 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
/** An "open" BitSet implementation that allows direct access to the array of words
|
||||||
|
* storing the bits.
|
||||||
|
* <p/>
|
||||||
|
* Unlike java.util.bitet, the fact that bits are packed into an array of longs
|
||||||
|
* is part of the interface. This allows efficient implementation of other algorithms
|
||||||
|
* by someone other than the author. It also allows one to efficiently implement
|
||||||
|
* alternate serialization or interchange formats.
|
||||||
|
* <p/>
|
||||||
|
* <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
|
||||||
|
* and *much* faster at calculating cardinality of sets and results of set operations.
|
||||||
|
* It can also handle sets of larger cardinality (up to 64 * 2**32-1)
|
||||||
|
* <p/>
|
||||||
|
* The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
|
||||||
|
* maximum code reuse. Extra safety and encapsulation
|
||||||
|
* may always be built on top, but if that's built in, the cost can never be removed (and
|
||||||
|
* hence people re-implement their own version in order to get better performance).
|
||||||
|
* If you want a "safe", totally encapsulated (and slower and limited) BitSet
|
||||||
|
* class, use <code>java.util.BitSet</code>.
|
||||||
|
* <p/>
|
||||||
|
* <h3>Performance Results</h3>
|
||||||
|
*
|
||||||
|
Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
|
||||||
|
<br/>BitSet size = 1,000,000
|
||||||
|
<br/>Results are java.util.BitSet time divided by OpenBitSet time.
|
||||||
|
<table border="1">
|
||||||
|
<tr>
|
||||||
|
<th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>1% full</th> <td>3.31</td> <td>3.90</td> <td> </td> <td>1.04</td> <td> </td> <td>0.99</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br/>
|
||||||
|
Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
|
||||||
|
<br/>BitSet size = 1,000,000
|
||||||
|
<br/>Results are java.util.BitSet time divided by OpenBitSet time.
|
||||||
|
<table border="1">
|
||||||
|
<tr>
|
||||||
|
<th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>1% full</th> <td>2.51</td> <td>3.49</td> <td> </td> <td>1.00</td> <td> </td> <td>1.02</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
|
||||||
|
protected long[] bits;
|
||||||
|
protected int wlen; // number of words (elements) used in the array
|
||||||
|
|
||||||
|
/** Constructs an OpenBitSet large enough to hold numBits.
|
||||||
|
*
|
||||||
|
* @param numBits
|
||||||
|
*/
|
||||||
|
public OpenBitSet(long numBits) {
|
||||||
|
bits = new long[bits2words(numBits)];
|
||||||
|
wlen = bits.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OpenBitSet() {
|
||||||
|
this(64);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Constructs an OpenBitSet from an existing long[].
|
||||||
|
* <br/>
|
||||||
|
* The first 64 bits are in long[0],
|
||||||
|
* with bit index 0 at the least significant bit, and bit index 63 at the most significant.
|
||||||
|
* Given a bit index,
|
||||||
|
* the word containing it is long[index/64], and it is at bit number index%64 within that word.
|
||||||
|
* <p>
|
||||||
|
* numWords are the number of elements in the array that contain
|
||||||
|
* set bits (non-zero longs).
|
||||||
|
* numWords should be <= bits.length, and
|
||||||
|
* any existing words in the array at position >= numWords should be zero.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public OpenBitSet(long[] bits, int numWords) {
|
||||||
|
this.bits = bits;
|
||||||
|
this.wlen = numWords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return new OpenBitSetIterator(bits, wlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
|
||||||
|
public long capacity() { return bits.length << 6; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current capacity of this set. Included for
|
||||||
|
* compatibility. This is *not* equal to {@link #cardinality}
|
||||||
|
*/
|
||||||
|
public long size() {
|
||||||
|
return capacity();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if there are no set bits */
|
||||||
|
public boolean isEmpty() { return cardinality()==0; }
|
||||||
|
|
||||||
|
/** Expert: returns the long[] storing the bits */
|
||||||
|
public long[] getBits() { return bits; }
|
||||||
|
|
||||||
|
/** Expert: sets a new long[] to use as the bit storage */
|
||||||
|
public void setBits(long[] bits) { this.bits = bits; }
|
||||||
|
|
||||||
|
/** Expert: gets the number of longs in the array that are in use */
|
||||||
|
public int getNumWords() { return wlen; }
|
||||||
|
|
||||||
|
/** Expert: sets the number of longs in the array that are in use */
|
||||||
|
public void setNumWords(int nWords) { this.wlen=nWords; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns true or false for the specified bit index. */
|
||||||
|
public boolean get(int index) {
|
||||||
|
int i = index >> 6; // div 64
|
||||||
|
// signed shift will keep a negative index and force an
|
||||||
|
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||||
|
if (i>=bits.length) return false;
|
||||||
|
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
return (bits[i] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns true or false for the specified bit index.
|
||||||
|
* The index should be less than the OpenBitSet size
|
||||||
|
*/
|
||||||
|
public boolean fastGet(int index) {
|
||||||
|
int i = index >> 6; // div 64
|
||||||
|
// signed shift will keep a negative index and force an
|
||||||
|
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
return (bits[i] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns true or false for the specified bit index
|
||||||
|
* The index should be less than the OpenBitSet size
|
||||||
|
*/
|
||||||
|
public boolean get(long index) {
|
||||||
|
int i = (int)(index >> 6); // div 64
|
||||||
|
if (i>=bits.length) return false;
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
return (bits[i] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true or false for the specified bit index. Allows specifying
|
||||||
|
* an index outside the current size. */
|
||||||
|
public boolean fastGet(long index) {
|
||||||
|
int i = (int)(index >> 6); // div 64
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
return (bits[i] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
// alternate implementation of get()
|
||||||
|
public boolean get1(int index) {
|
||||||
|
int i = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
return ((bits[i]>>>bit) & 0x01) != 0;
|
||||||
|
// this does a long shift and a bittest (on x86) vs
|
||||||
|
// a long shift, and a long AND, (the test for zero is prob a no-op)
|
||||||
|
// testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/** returns 1 if the bit is set, 0 if not.
|
||||||
|
* The index should be less than the OpenBitSet size
|
||||||
|
*/
|
||||||
|
public int getBit(int index) {
|
||||||
|
int i = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
return ((int)(bits[i]>>>bit)) & 0x01;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
public boolean get2(int index) {
|
||||||
|
int word = index >> 6; // div 64
|
||||||
|
int bit = index & 0x0000003f; // mod 64
|
||||||
|
return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
|
||||||
|
// we could right shift and check for parity bit, if it was available to us.
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** sets a bit, expanding the set size if necessary */
|
||||||
|
public void set(long index) {
|
||||||
|
int wordNum = expandingWordNum(index);
|
||||||
|
int bit = (int)index & 0x3f;
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] |= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Sets the bit at the specified index.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastSet(int index) {
|
||||||
|
int wordNum = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] |= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets the bit at the specified index.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastSet(long index) {
|
||||||
|
int wordNum = (int)(index >> 6);
|
||||||
|
int bit = (int)index & 0x3f;
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] |= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets a range of bits, expanding the set size if necessary
|
||||||
|
*
|
||||||
|
* @param startIndex lower index
|
||||||
|
* @param endIndex one-past the last bit to set
|
||||||
|
*/
|
||||||
|
public void set(long startIndex, long endIndex) {
|
||||||
|
if (endIndex <= startIndex) return;
|
||||||
|
|
||||||
|
int startWord = (int)(startIndex>>6);
|
||||||
|
|
||||||
|
// since endIndex is one past the end, this is index of the last
|
||||||
|
// word to be changed.
|
||||||
|
int endWord = expandingWordNum(endIndex-1);
|
||||||
|
|
||||||
|
long startmask = -1L << startIndex;
|
||||||
|
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||||
|
|
||||||
|
if (startWord == endWord) {
|
||||||
|
bits[startWord] |= (startmask & endmask);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bits[startWord] |= startmask;
|
||||||
|
Arrays.fill(bits, startWord+1, endWord, -1L);
|
||||||
|
bits[endWord] |= endmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
protected int expandingWordNum(long index) {
|
||||||
|
int wordNum = (int)(index >> 6);
|
||||||
|
if (wordNum>=wlen) {
|
||||||
|
ensureCapacity(index+1);
|
||||||
|
wlen = wordNum+1;
|
||||||
|
}
|
||||||
|
return wordNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** clears a bit.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastClear(int index) {
|
||||||
|
int wordNum = index >> 6;
|
||||||
|
int bit = index & 0x03f;
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] &= ~bitmask;
|
||||||
|
// hmmm, it takes one more instruction to clear than it does to set... any
|
||||||
|
// way to work around this? If there were only 63 bits per word, we could
|
||||||
|
// use a right shift of 10111111...111 in binary to position the 0 in the
|
||||||
|
// correct place (using sign extension).
|
||||||
|
// Could also use Long.rotateRight() or rotateLeft() *if* they were converted
|
||||||
|
// by the JVM into a native instruction.
|
||||||
|
// bits[word] &= Long.rotateLeft(0xfffffffe,bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** clears a bit.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastClear(long index) {
|
||||||
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] &= ~bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** clears a bit, allowing access beyond the current set size without changing the size.*/
|
||||||
|
public void clear(long index) {
|
||||||
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
|
if (wordNum>=wlen) return;
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] &= ~bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Clears a range of bits. Clearing past the end does not change the size of the set.
|
||||||
|
*
|
||||||
|
* @param startIndex lower index
|
||||||
|
* @param endIndex one-past the last bit to clear
|
||||||
|
*/
|
||||||
|
public void clear(long startIndex, long endIndex) {
|
||||||
|
if (endIndex <= startIndex) return;
|
||||||
|
|
||||||
|
int startWord = (int)(startIndex>>6);
|
||||||
|
if (startWord >= wlen) return;
|
||||||
|
|
||||||
|
// since endIndex is one past the end, this is index of the last
|
||||||
|
// word to be changed.
|
||||||
|
int endWord = (int)((endIndex-1)>>6);
|
||||||
|
|
||||||
|
long startmask = -1L << startIndex;
|
||||||
|
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||||
|
|
||||||
|
// invert masks since we are clearing
|
||||||
|
startmask = ~startmask;
|
||||||
|
endmask = ~endmask;
|
||||||
|
|
||||||
|
if (startWord == endWord) {
|
||||||
|
bits[startWord] &= (startmask | endmask);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bits[startWord] &= startmask;
|
||||||
|
|
||||||
|
int middle = Math.min(wlen, endWord);
|
||||||
|
Arrays.fill(bits, startWord+1, middle, 0L);
|
||||||
|
if (endWord < wlen) {
|
||||||
|
bits[endWord] &= endmask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Sets a bit and returns the previous value.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public boolean getAndSet(int index) {
|
||||||
|
int wordNum = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||||
|
bits[wordNum] |= bitmask;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets a bit and returns the previous value.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public boolean getAndSet(long index) {
|
||||||
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
boolean val = (bits[wordNum] & bitmask) != 0;
|
||||||
|
bits[wordNum] |= bitmask;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** flips a bit.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastFlip(int index) {
|
||||||
|
int wordNum = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] ^= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** flips a bit.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public void fastFlip(long index) {
|
||||||
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] ^= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** flips a bit, expanding the set size if necessary */
|
||||||
|
public void flip(long index) {
|
||||||
|
int wordNum = expandingWordNum(index);
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] ^= bitmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** flips a bit and returns the resulting bit value.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public boolean flipAndGet(int index) {
|
||||||
|
int wordNum = index >> 6; // div 64
|
||||||
|
int bit = index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] ^= bitmask;
|
||||||
|
return (bits[wordNum] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** flips a bit and returns the resulting bit value.
|
||||||
|
* The index should be less than the OpenBitSet size.
|
||||||
|
*/
|
||||||
|
public boolean flipAndGet(long index) {
|
||||||
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
|
long bitmask = 1L << bit;
|
||||||
|
bits[wordNum] ^= bitmask;
|
||||||
|
return (bits[wordNum] & bitmask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flips a range of bits, expanding the set size if necessary
|
||||||
|
*
|
||||||
|
* @param startIndex lower index
|
||||||
|
* @param endIndex one-past the last bit to flip
|
||||||
|
*/
|
||||||
|
public void flip(long startIndex, long endIndex) {
|
||||||
|
if (endIndex <= startIndex) return;
|
||||||
|
int oldlen = wlen;
|
||||||
|
int startWord = (int)(startIndex>>6);
|
||||||
|
|
||||||
|
// since endIndex is one past the end, this is index of the last
|
||||||
|
// word to be changed.
|
||||||
|
int endWord = expandingWordNum(endIndex-1);
|
||||||
|
|
||||||
|
/*** Grrr, java shifting wraps around so -1L>>>64 == -1
|
||||||
|
* for that reason, make sure not to use endmask if the bits to flip will
|
||||||
|
* be zero in the last word (redefine endWord to be the last changed...)
|
||||||
|
long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
|
||||||
|
long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
|
||||||
|
***/
|
||||||
|
|
||||||
|
long startmask = -1L << startIndex;
|
||||||
|
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||||
|
|
||||||
|
if (startWord == endWord) {
|
||||||
|
bits[startWord] ^= (startmask & endmask);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bits[startWord] ^= startmask;
|
||||||
|
|
||||||
|
for (int i=startWord+1; i<endWord; i++) {
|
||||||
|
bits[i] = ~bits[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
bits[endWord] ^= endmask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
public static int pop(long v0, long v1, long v2, long v3) {
|
||||||
|
// derived from pop_array by setting last four elems to 0.
|
||||||
|
// exchanges one pop() call for 10 elementary operations
|
||||||
|
// saving about 7 instructions... is there a better way?
|
||||||
|
long twosA=v0 & v1;
|
||||||
|
long ones=v0^v1;
|
||||||
|
|
||||||
|
long u2=ones^v2;
|
||||||
|
long twosB =(ones&v2)|(u2&v3);
|
||||||
|
ones=u2^v3;
|
||||||
|
|
||||||
|
long fours=(twosA&twosB);
|
||||||
|
long twos=twosA^twosB;
|
||||||
|
|
||||||
|
return (pop(fours)<<2)
|
||||||
|
+ (pop(twos)<<1)
|
||||||
|
+ pop(ones);
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/** @return the number of set bits */
|
||||||
|
public long cardinality() {
|
||||||
|
return BitUtil.pop_array(bits,0,wlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of the intersection of the two sets.
|
||||||
|
* Neither set is modified.
|
||||||
|
*/
|
||||||
|
public static long intersectionCount(OpenBitSet a, OpenBitSet b) {
|
||||||
|
return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of the union of the two sets.
|
||||||
|
* Neither set is modified.
|
||||||
|
*/
|
||||||
|
public static long unionCount(OpenBitSet a, OpenBitSet b) {
|
||||||
|
long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
|
||||||
|
if (a.wlen < b.wlen) {
|
||||||
|
tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
|
||||||
|
} else if (a.wlen > b.wlen) {
|
||||||
|
tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
|
||||||
|
}
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of "a and not b"
|
||||||
|
* or "intersection(a, not(b))".
|
||||||
|
* Neither set is modified.
|
||||||
|
*/
|
||||||
|
public static long andNotCount(OpenBitSet a, OpenBitSet b) {
|
||||||
|
long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
|
||||||
|
if (a.wlen > b.wlen) {
|
||||||
|
tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
|
||||||
|
}
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the popcount or cardinality of the exclusive-or of the two sets.
|
||||||
|
* Neither set is modified.
|
||||||
|
*/
|
||||||
|
public static long xorCount(OpenBitSet a, OpenBitSet b) {
|
||||||
|
long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
|
||||||
|
if (a.wlen < b.wlen) {
|
||||||
|
tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
|
||||||
|
} else if (a.wlen > b.wlen) {
|
||||||
|
tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
|
||||||
|
}
|
||||||
|
return tot;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the index of the first set bit starting at the index specified.
|
||||||
|
* -1 is returned if there are no more set bits.
|
||||||
|
*/
|
||||||
|
public int nextSetBit(int index) {
|
||||||
|
int i = index>>6;
|
||||||
|
if (i>=wlen) return -1;
|
||||||
|
int subIndex = index & 0x3f; // index within the word
|
||||||
|
long word = bits[i] >> subIndex; // skip all the bits to the right of index
|
||||||
|
|
||||||
|
if (word!=0) {
|
||||||
|
return (i<<6) + subIndex + BitUtil.ntz(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(++i < wlen) {
|
||||||
|
word = bits[i];
|
||||||
|
if (word!=0) return (i<<6) + BitUtil.ntz(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the index of the first set bit starting at the index specified.
|
||||||
|
* -1 is returned if there are no more set bits.
|
||||||
|
*/
|
||||||
|
public long nextSetBit(long index) {
|
||||||
|
int i = (int)(index>>>6);
|
||||||
|
if (i>=wlen) return -1;
|
||||||
|
int subIndex = (int)index & 0x3f; // index within the word
|
||||||
|
long word = bits[i] >>> subIndex; // skip all the bits to the right of index
|
||||||
|
|
||||||
|
if (word!=0) {
|
||||||
|
return (((long)i)<<6) + (subIndex + BitUtil.ntz(word));
|
||||||
|
}
|
||||||
|
|
||||||
|
while(++i < wlen) {
|
||||||
|
word = bits[i];
|
||||||
|
if (word!=0) return (((long)i)<<6) + BitUtil.ntz(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public Object clone() {
|
||||||
|
try {
|
||||||
|
OpenBitSet obs = (OpenBitSet)super.clone();
|
||||||
|
obs.bits = (long[]) obs.bits.clone(); // hopefully an array clone is as fast(er) than arraycopy
|
||||||
|
return obs;
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** this = this AND other */
|
||||||
|
public void intersect(OpenBitSet other) {
|
||||||
|
int newLen= Math.min(this.wlen,other.wlen);
|
||||||
|
long[] thisArr = this.bits;
|
||||||
|
long[] otherArr = other.bits;
|
||||||
|
// testing against zero can be more efficient
|
||||||
|
int pos=newLen;
|
||||||
|
while(--pos>=0) {
|
||||||
|
thisArr[pos] &= otherArr[pos];
|
||||||
|
}
|
||||||
|
if (this.wlen > newLen) {
|
||||||
|
// fill zeros from the new shorter length to the old length
|
||||||
|
Arrays.fill(bits,newLen,this.wlen,0);
|
||||||
|
}
|
||||||
|
this.wlen = newLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** this = this OR other */
|
||||||
|
public void union(OpenBitSet other) {
|
||||||
|
int newLen = Math.max(wlen,other.wlen);
|
||||||
|
ensureCapacityWords(newLen);
|
||||||
|
|
||||||
|
long[] thisArr = this.bits;
|
||||||
|
long[] otherArr = other.bits;
|
||||||
|
int pos=Math.min(wlen,other.wlen);
|
||||||
|
while(--pos>=0) {
|
||||||
|
thisArr[pos] |= otherArr[pos];
|
||||||
|
}
|
||||||
|
if (this.wlen < newLen) {
|
||||||
|
System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen);
|
||||||
|
}
|
||||||
|
this.wlen = newLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Remove all elements set in other. this = this AND_NOT other */
|
||||||
|
public void remove(OpenBitSet other) {
|
||||||
|
int idx = Math.min(wlen,other.wlen);
|
||||||
|
long[] thisArr = this.bits;
|
||||||
|
long[] otherArr = other.bits;
|
||||||
|
while(--idx>=0) {
|
||||||
|
thisArr[idx] &= ~otherArr[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** this = this XOR other */
|
||||||
|
public void xor(OpenBitSet other) {
|
||||||
|
int newLen = Math.max(wlen,other.wlen);
|
||||||
|
ensureCapacityWords(newLen);
|
||||||
|
|
||||||
|
long[] thisArr = this.bits;
|
||||||
|
long[] otherArr = other.bits;
|
||||||
|
int pos=Math.min(wlen,other.wlen);
|
||||||
|
while(--pos>=0) {
|
||||||
|
thisArr[pos] ^= otherArr[pos];
|
||||||
|
}
|
||||||
|
if (this.wlen < newLen) {
|
||||||
|
System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen);
|
||||||
|
}
|
||||||
|
this.wlen = newLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// some BitSet compatability methods
|
||||||
|
|
||||||
|
//** see {@link intersect} */
|
||||||
|
public void and(OpenBitSet other) {
|
||||||
|
intersect(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
//** see {@link union} */
|
||||||
|
public void or(OpenBitSet other) {
|
||||||
|
union(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
//** see {@link andNot} */
|
||||||
|
public void andNot(OpenBitSet other) {
|
||||||
|
remove(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns true if the sets have any elements in common */
|
||||||
|
public boolean intersects(OpenBitSet other) {
|
||||||
|
int pos = Math.min(this.wlen, other.wlen);
|
||||||
|
long[] thisArr = this.bits;
|
||||||
|
long[] otherArr = other.bits;
|
||||||
|
while (--pos>=0) {
|
||||||
|
if ((thisArr[pos] & otherArr[pos])!=0) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Expand the long[] with the size given as a number of words (64 bit longs).
|
||||||
|
* getNumWords() is unchanged by this call.
|
||||||
|
*/
|
||||||
|
public void ensureCapacityWords(int numWords) {
|
||||||
|
if (bits.length < numWords) {
|
||||||
|
long[] newBits = new long[numWords];
|
||||||
|
System.arraycopy(bits,0,newBits,0,wlen);
|
||||||
|
bits = newBits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
|
||||||
|
* getNumWords() is unchanged by this call.
|
||||||
|
*/
|
||||||
|
public void ensureCapacity(long numBits) {
|
||||||
|
ensureCapacityWords(bits2words(numBits));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Lowers numWords, the number of words in use,
|
||||||
|
* by checking for trailing zero words.
|
||||||
|
*/
|
||||||
|
public void trimTrailingZeros() {
|
||||||
|
int idx = wlen-1;
|
||||||
|
while (idx>=0 && bits[idx]==0) idx--;
|
||||||
|
wlen = idx+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the number of 64 bit words it would take to hold numBits */
|
||||||
|
public static int bits2words(long numBits) {
|
||||||
|
return (int)(((numBits-1)>>>6)+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** returns true if both sets have the same bits set */
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof OpenBitSet)) return false;
|
||||||
|
OpenBitSet a;
|
||||||
|
OpenBitSet b = (OpenBitSet)o;
|
||||||
|
// make a the larger set.
|
||||||
|
if (b.wlen > this.wlen) {
|
||||||
|
a = b; b=this;
|
||||||
|
} else {
|
||||||
|
a=this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for any set bits out of the range of b
|
||||||
|
for (int i=a.wlen-1; i>=b.wlen; i--) {
|
||||||
|
if (a.bits[i]!=0) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i=b.wlen-1; i>=0; i--) {
|
||||||
|
if (a.bits[i] != b.bits[i]) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
long h = 0x98761234; // something non-zero for length==0
|
||||||
|
for (int i = bits.length; --i>=0;) {
|
||||||
|
h ^= bits[i];
|
||||||
|
h = (h << 1) | (h >>> 31); // rotate left
|
||||||
|
}
|
||||||
|
return (int)((h>>32) ^ h); // fold leftmost bits into right
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,173 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
/** An iterator to iterate over set bits in an OpenBitSet.
|
||||||
|
* This is faster than nextSetBit() for iterating over the complete set of bits,
|
||||||
|
* especially when the density of the bits set is high.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class OpenBitSetIterator extends DocIdSetIterator {
|
||||||
|
|
||||||
|
// The General Idea: instead of having an array per byte that has
|
||||||
|
// the offsets of the next set bit, that array could be
|
||||||
|
// packed inside a 32 bit integer (8 4 bit numbers). That
|
||||||
|
// should be faster than accessing an array for each index, and
|
||||||
|
// the total array size is kept smaller (256*sizeof(int))=1K
|
||||||
|
protected final static int[] bitlist={
|
||||||
|
0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
|
||||||
|
};
|
||||||
|
/***** the python code that generated bitlist
|
||||||
|
def bits2int(val):
|
||||||
|
arr=0
|
||||||
|
for shift in range(8,0,-1):
|
||||||
|
if val & 0x80:
|
||||||
|
arr = (arr << 4) | shift
|
||||||
|
val = val << 1
|
||||||
|
return arr
|
||||||
|
|
||||||
|
def int_table():
|
||||||
|
tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
|
||||||
|
return ','.join(tbl)
|
||||||
|
******/
|
||||||
|
|
||||||
|
// hmmm, what about an iterator that finds zeros though,
|
||||||
|
// or a reverse iterator... should they be separate classes
|
||||||
|
// for efficiency, or have a common root interface? (or
|
||||||
|
// maybe both? could ask for a SetBitsIterator, etc...
|
||||||
|
|
||||||
|
|
||||||
|
private final long[] arr;
|
||||||
|
private final int words;
|
||||||
|
private int i=-1;
|
||||||
|
private long word;
|
||||||
|
private int wordShift;
|
||||||
|
private int indexArray;
|
||||||
|
private int curDocId;
|
||||||
|
|
||||||
|
public OpenBitSetIterator(OpenBitSet obs) {
|
||||||
|
this(obs.getBits(), obs.getNumWords());
|
||||||
|
}
|
||||||
|
|
||||||
|
public OpenBitSetIterator(long[] bits, int numWords) {
|
||||||
|
arr = bits;
|
||||||
|
words = numWords;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 64 bit shifts
|
||||||
|
private void shift() {
|
||||||
|
if ((int)word ==0) {wordShift +=32; word = word >>>32; }
|
||||||
|
if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
|
||||||
|
if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
|
||||||
|
indexArray = bitlist[(int)word & 0xff];
|
||||||
|
}
|
||||||
|
|
||||||
|
/***** alternate shift implementations
|
||||||
|
// 32 bit shifts, but a long shift needed at the end
|
||||||
|
private void shift2() {
|
||||||
|
int y = (int)word;
|
||||||
|
if (y==0) {wordShift +=32; y = (int)(word >>>32); }
|
||||||
|
if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
|
||||||
|
if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
|
||||||
|
indexArray = bitlist[y & 0xff];
|
||||||
|
word >>>= (wordShift +1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void shift3() {
|
||||||
|
int lower = (int)word;
|
||||||
|
int lowByte = lower & 0xff;
|
||||||
|
if (lowByte != 0) {
|
||||||
|
indexArray=bitlist[lowByte];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
shift();
|
||||||
|
}
|
||||||
|
******/
|
||||||
|
|
||||||
|
public boolean next() {
|
||||||
|
if (indexArray==0) {
|
||||||
|
if (word!=0) {
|
||||||
|
word >>>= 8;
|
||||||
|
wordShift += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (word==0) {
|
||||||
|
if (++i >= words) {
|
||||||
|
curDocId = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
word = arr[i];
|
||||||
|
wordShift =-1; // loop invariant code motion should move this
|
||||||
|
}
|
||||||
|
|
||||||
|
// after the first time, should I go with a linear search, or
|
||||||
|
// stick with the binary search in shift?
|
||||||
|
shift();
|
||||||
|
}
|
||||||
|
|
||||||
|
int bitIndex = (indexArray & 0x0f) + wordShift;
|
||||||
|
indexArray >>>= 4;
|
||||||
|
// should i<<6 be cached as a separate variable?
|
||||||
|
// it would only save one cycle in the best circumstances.
|
||||||
|
curDocId = (i<<6) + bitIndex;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) {
|
||||||
|
indexArray=0;
|
||||||
|
i = target >> 6;
|
||||||
|
if (i>=words) {
|
||||||
|
word =0; // setup so next() will also return -1
|
||||||
|
curDocId = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
wordShift = target & 0x3f;
|
||||||
|
word = arr[i] >>> wordShift;
|
||||||
|
if (word !=0) {
|
||||||
|
wordShift--; // compensate for 1 based arrIndex
|
||||||
|
} else {
|
||||||
|
while (word ==0) {
|
||||||
|
if (++i >= words) {
|
||||||
|
curDocId = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
word = arr[i];
|
||||||
|
}
|
||||||
|
wordShift =-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
shift();
|
||||||
|
|
||||||
|
int bitIndex = (indexArray & 0x0f) + wordShift;
|
||||||
|
indexArray >>>= 4;
|
||||||
|
// should i<<6 be cached as a separate variable?
|
||||||
|
// it would only save one cycle in the best circumstances.
|
||||||
|
curDocId = (i<<6) + bitIndex;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() {
|
||||||
|
return this.curDocId;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,218 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store and iterate sorted integers in compressed form in RAM.
|
||||||
|
* <br>The code for compressing the differences between ascending integers was
|
||||||
|
* borrowed from {@link org.apache.lucene.store.IndexInput} and
|
||||||
|
* {@link org.apache.lucene.store.IndexOutput}.
|
||||||
|
*/
|
||||||
|
public class SortedVIntList extends DocIdSet {
|
||||||
|
/** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
|
||||||
|
* a SortedVIntList representing the index numbers of the set bits
|
||||||
|
* will be smaller than that BitSet.
|
||||||
|
*/
|
||||||
|
final static int BITS2VINTLIST_SIZE = 8;
|
||||||
|
|
||||||
|
private int size;
|
||||||
|
private byte[] bytes;
|
||||||
|
private int lastBytePos;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SortedVIntList from all elements of an array of integers.
|
||||||
|
*
|
||||||
|
* @param sortedInts A sorted array of non negative integers.
|
||||||
|
*/
|
||||||
|
public SortedVIntList(int[] sortedInts) {
|
||||||
|
this(sortedInts, sortedInts.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SortedVIntList from an array of integers.
|
||||||
|
* @param sortedInts An array of sorted non negative integers.
|
||||||
|
* @param inputSize The number of integers to be used from the array.
|
||||||
|
*/
|
||||||
|
public SortedVIntList(int[] sortedInts, int inputSize) {
|
||||||
|
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||||
|
for (int i = 0; i < inputSize; i++) {
|
||||||
|
builder.addInt(sortedInts[i]);
|
||||||
|
}
|
||||||
|
builder.done();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SortedVIntList from a BitSet.
|
||||||
|
* @param bits A bit set representing a set of integers.
|
||||||
|
*/
|
||||||
|
public SortedVIntList(BitSet bits) {
|
||||||
|
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||||
|
int nextInt = bits.nextSetBit(0);
|
||||||
|
while (nextInt != -1) {
|
||||||
|
builder.addInt(nextInt);
|
||||||
|
nextInt = bits.nextSetBit(nextInt + 1);
|
||||||
|
}
|
||||||
|
builder.done();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SortedVIntList from an OpenBitSet.
|
||||||
|
* @param bits A bit set representing a set of integers.
|
||||||
|
*/
|
||||||
|
public SortedVIntList(OpenBitSet bits) {
|
||||||
|
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||||
|
int nextInt = bits.nextSetBit(0);
|
||||||
|
while (nextInt != -1) {
|
||||||
|
builder.addInt(nextInt);
|
||||||
|
nextInt = bits.nextSetBit(nextInt + 1);
|
||||||
|
}
|
||||||
|
builder.done();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SortedVIntList.
|
||||||
|
* @param docIdSetIterator An iterator providing document numbers as a set of integers.
|
||||||
|
* This DocIdSetIterator is iterated completely when this constructor
|
||||||
|
* is called and it must provide the integers in non
|
||||||
|
* decreasing order.
|
||||||
|
*/
|
||||||
|
public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
|
||||||
|
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||||
|
while (docIdSetIterator.next()) {
|
||||||
|
builder.addInt(docIdSetIterator.doc());
|
||||||
|
}
|
||||||
|
builder.done();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private class SortedVIntListBuilder {
|
||||||
|
private int lastInt = 0;
|
||||||
|
|
||||||
|
SortedVIntListBuilder() {
|
||||||
|
initBytes();
|
||||||
|
lastInt = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void addInt(int nextInt) {
|
||||||
|
int diff = nextInt - lastInt;
|
||||||
|
if (diff < 0) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Input not sorted or first element negative.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
|
||||||
|
// biggest possible int does not fit
|
||||||
|
resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// See org.apache.lucene.store.IndexOutput.writeVInt()
|
||||||
|
while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
|
||||||
|
bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
|
||||||
|
diff >>>= BIT_SHIFT;
|
||||||
|
}
|
||||||
|
bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
|
||||||
|
size++;
|
||||||
|
lastInt = nextInt;
|
||||||
|
}
|
||||||
|
|
||||||
|
void done() {
|
||||||
|
resizeBytes(lastBytePos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void initBytes() {
|
||||||
|
size = 0;
|
||||||
|
bytes = new byte[128]; // initial byte size
|
||||||
|
lastBytePos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void resizeBytes(int newSize) {
|
||||||
|
if (newSize != bytes.length) {
|
||||||
|
byte[] newBytes = new byte[newSize];
|
||||||
|
System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
|
||||||
|
bytes = newBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int VB1 = 0x7F;
|
||||||
|
private static final int BIT_SHIFT = 7;
|
||||||
|
private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The total number of sorted integers.
|
||||||
|
*/
|
||||||
|
public int size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The size of the byte array storing the compressed sorted integers.
|
||||||
|
*/
|
||||||
|
public int getByteSize() {
|
||||||
|
return bytes.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return An iterator over the sorted integers.
|
||||||
|
*/
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return new DocIdSetIterator() {
|
||||||
|
int bytePos = 0;
|
||||||
|
int lastInt = 0;
|
||||||
|
|
||||||
|
private void advance() {
|
||||||
|
// See org.apache.lucene.store.IndexInput.readVInt()
|
||||||
|
byte b = bytes[bytePos++];
|
||||||
|
lastInt += b & VB1;
|
||||||
|
for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
|
||||||
|
b = bytes[bytePos++];
|
||||||
|
lastInt += (b & VB1) << s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() {return lastInt;}
|
||||||
|
|
||||||
|
public boolean next() {
|
||||||
|
if (bytePos >= lastBytePos) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
advance();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int docNr) {
|
||||||
|
while (bytePos < lastBytePos) {
|
||||||
|
advance();
|
||||||
|
if (lastInt >= docNr) { // No skipping to docNr available.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -43,13 +43,13 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
|
||||||
this.shouldHaveCache = shouldHaveCache;
|
this.shouldHaveCache = shouldHaveCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
if (cache == null) {
|
if (cache == null) {
|
||||||
cache = new WeakHashMap();
|
cache = new WeakHashMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized (cache) { // check cache
|
synchronized (cache) { // check cache
|
||||||
BitSet cached = (BitSet) cache.get(reader);
|
DocIdSet cached = (DocIdSet) cache.get(reader);
|
||||||
if (shouldHaveCache) {
|
if (shouldHaveCache) {
|
||||||
TestCase.assertNotNull("Cache should have data ", cached);
|
TestCase.assertNotNull("Cache should have data ", cached);
|
||||||
} else {
|
} else {
|
||||||
|
@ -60,7 +60,7 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final BitSet bits = filter.bits(reader);
|
final DocIdSet bits = filter.getDocIdSet(reader);
|
||||||
|
|
||||||
synchronized (cache) { // update cache
|
synchronized (cache) { // update cache
|
||||||
cache.put(reader, bits);
|
cache.put(reader, bits);
|
||||||
|
|
|
@ -18,14 +18,15 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
|
||||||
public class MockFilter extends Filter {
|
public class MockFilter extends Filter {
|
||||||
private boolean wasCalled;
|
private boolean wasCalled;
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) {
|
public DocIdSet getDocIdSet(IndexReader reader) {
|
||||||
wasCalled = true;
|
wasCalled = true;
|
||||||
return new BitSet();
|
return new DocIdBitSet(new BitSet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
|
||||||
this.shouldHaveCache = shouldHaveCache;
|
this.shouldHaveCache = shouldHaveCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
|
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
|
||||||
|
|
||||||
TestCase.assertNotNull("Filter should not be null", cachedFilter);
|
TestCase.assertNotNull("Filter should not be null", cachedFilter);
|
||||||
|
@ -55,6 +55,6 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
|
||||||
if (filter instanceof CachingWrapperFilterHelper) {
|
if (filter instanceof CachingWrapperFilterHelper) {
|
||||||
((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache);
|
((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache);
|
||||||
}
|
}
|
||||||
return cachedFilter.bits(reader);
|
return cachedFilter.getDocIdSet(reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -29,9 +30,9 @@ public class SingleDocTestFilter extends Filter {
|
||||||
this.doc = doc;
|
this.doc = doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
BitSet bits = new BitSet(reader.maxDoc());
|
BitSet bits = new BitSet(reader.maxDoc());
|
||||||
bits.set(doc);
|
bits.set(doc);
|
||||||
return bits;
|
return new DocIdBitSet(bits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,12 +36,12 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
|
||||||
CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
|
CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
|
||||||
|
|
||||||
// first time, nested filter is called
|
// first time, nested filter is called
|
||||||
cacher.bits(reader);
|
cacher.getDocIdSet(reader);
|
||||||
assertTrue("first time", filter.wasCalled());
|
assertTrue("first time", filter.wasCalled());
|
||||||
|
|
||||||
// second time, nested filter should not be called
|
// second time, nested filter should not be called
|
||||||
filter.clear();
|
filter.clear();
|
||||||
cacher.bits(reader);
|
cacher.getDocIdSet(reader);
|
||||||
assertFalse("second time", filter.wasCalled());
|
assertFalse("second time", filter.wasCalled());
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.QueryParser;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
@ -122,12 +123,12 @@ public class TestExplanations extends LuceneTestCase {
|
||||||
public ItemizedFilter(int[] docs) {
|
public ItemizedFilter(int[] docs) {
|
||||||
this.docs = docs;
|
this.docs = docs;
|
||||||
}
|
}
|
||||||
public BitSet bits(IndexReader r) {
|
public DocIdSet getDocIdSet(IndexReader r) {
|
||||||
BitSet b = new BitSet(r.maxDoc());
|
BitSet b = new BitSet(r.maxDoc());
|
||||||
for (int i = 0; i < docs.length; i++) {
|
for (int i = 0; i < docs.length; i++) {
|
||||||
b.set(docs[i]);
|
b.set(docs[i]);
|
||||||
}
|
}
|
||||||
return b;
|
return new DocIdBitSet(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
@ -82,11 +83,11 @@ extends LuceneTestCase {
|
||||||
// must be static for serialization tests
|
// must be static for serialization tests
|
||||||
private static Filter newStaticFilterB() {
|
private static Filter newStaticFilterB() {
|
||||||
return new Filter() {
|
return new Filter() {
|
||||||
public BitSet bits (IndexReader reader) {
|
public DocIdSet getDocIdSet (IndexReader reader) {
|
||||||
BitSet bitset = new BitSet(5);
|
BitSet bitset = new BitSet(5);
|
||||||
bitset.set (1);
|
bitset.set (1);
|
||||||
bitset.set (3);
|
bitset.set (3);
|
||||||
return bitset;
|
return new DocIdBitSet(bitset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -150,10 +151,10 @@ extends LuceneTestCase {
|
||||||
// must be static for serialization tests
|
// must be static for serialization tests
|
||||||
private static Filter newStaticFilterA() {
|
private static Filter newStaticFilterA() {
|
||||||
return new Filter() {
|
return new Filter() {
|
||||||
public BitSet bits (IndexReader reader) {
|
public DocIdSet getDocIdSet (IndexReader reader) {
|
||||||
BitSet bitset = new BitSet(5);
|
BitSet bitset = new BitSet(5);
|
||||||
bitset.set(0, 5);
|
bitset.set(0, 5);
|
||||||
return bitset;
|
return new DocIdBitSet(bitset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -200,3 +201,4 @@ extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
||||||
public void testTermRemoteFilter() throws Exception {
|
public void testTermRemoteFilter() throws Exception {
|
||||||
CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))));
|
CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))));
|
||||||
|
|
||||||
// This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never
|
// This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never
|
||||||
// cache the filter on the remote site
|
// cache the filter on the remote site
|
||||||
|
@ -112,16 +112,16 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {
|
||||||
// assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper)
|
// assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper)
|
||||||
// this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys,
|
// this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys,
|
||||||
// and Filters' hashCode() builds on Filter parameters, not the Filter instance itself
|
// and Filters' hashCode() builds on Filter parameters, not the Filter instance itself
|
||||||
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
|
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
|
||||||
rcwfh.shouldHaveCache(false);
|
rcwfh.shouldHaveCache(false);
|
||||||
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
|
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
|
||||||
|
|
||||||
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
|
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
|
||||||
rcwfh.shouldHaveCache(true);
|
rcwfh.shouldHaveCache(true);
|
||||||
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
|
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
|
||||||
|
|
||||||
// assert that we get a non-cached version of the Filter because this is a new Query (type:b)
|
// assert that we get a non-cached version of the Filter because this is a new Query (type:b)
|
||||||
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false);
|
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "b"))), false);
|
||||||
rcwfh.shouldHaveCache(false);
|
rcwfh.shouldHaveCache(false);
|
||||||
search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B");
|
search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B");
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,11 +116,11 @@ public class TestRemoteSearchable extends LuceneTestCase {
|
||||||
Searcher searcher = new MultiSearcher(searchables);
|
Searcher searcher = new MultiSearcher(searchables);
|
||||||
Hits hits = searcher.search(
|
Hits hits = searcher.search(
|
||||||
new TermQuery(new Term("test", "text")),
|
new TermQuery(new Term("test", "text")),
|
||||||
new QueryFilter(new TermQuery(new Term("test", "test"))));
|
new QueryWrapperFilter(new TermQuery(new Term("test", "test"))));
|
||||||
assertEquals(1, hits.length());
|
assertEquals(1, hits.length());
|
||||||
Hits nohits = searcher.search(
|
Hits nohits = searcher.search(
|
||||||
new TermQuery(new Term("test", "text")),
|
new TermQuery(new Term("test", "text")),
|
||||||
new QueryFilter(new TermQuery(new Term("test", "non-existent-term"))));
|
new QueryWrapperFilter(new TermQuery(new Term("test", "non-existent-term"))));
|
||||||
assertEquals(0, nohits.length());
|
assertEquals(0, nohits.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ public class TestRemoteSearchable extends LuceneTestCase {
|
||||||
Searchable[] searchables = { getRemote() };
|
Searchable[] searchables = { getRemote() };
|
||||||
Searcher searcher = new MultiSearcher(searchables);
|
Searcher searcher = new MultiSearcher(searchables);
|
||||||
Hits hits = searcher.search(
|
Hits hits = searcher.search(
|
||||||
new ConstantScoreQuery(new QueryFilter(
|
new ConstantScoreQuery(new QueryWrapperFilter(
|
||||||
new TermQuery(new Term("test", "test")))));
|
new TermQuery(new Term("test", "test")))));
|
||||||
assertEquals(1, hits.length());
|
assertEquals(1, hits.length());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
@ -95,16 +96,6 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
return sets;
|
return sets;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class BitSetFilter extends Filter {
|
|
||||||
public BitSet set;
|
|
||||||
public BitSetFilter(BitSet set) {
|
|
||||||
this.set = set;
|
|
||||||
}
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
|
||||||
return set;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CountingHitCollector extends HitCollector {
|
public static class CountingHitCollector extends HitCollector {
|
||||||
int count=0;
|
int count=0;
|
||||||
int sum=0;
|
int sum=0;
|
||||||
|
@ -137,8 +128,12 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
||||||
BitSet addClause(BooleanQuery bq, BitSet result) {
|
BitSet addClause(BooleanQuery bq, BitSet result) {
|
||||||
BitSet rnd = sets[r.nextInt(sets.length)];
|
final BitSet rnd = sets[r.nextInt(sets.length)];
|
||||||
Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
|
Query q = new ConstantScoreQuery(new Filter() {
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) {
|
||||||
|
return new DocIdBitSet(rnd);
|
||||||
|
};
|
||||||
|
});
|
||||||
bq.add(q, BooleanClause.Occur.MUST);
|
bq.add(q, BooleanClause.Occur.MUST);
|
||||||
if (validate) {
|
if (validate) {
|
||||||
if (result==null) result = (BitSet)rnd.clone();
|
if (result==null) result = (BitSet)rnd.clone();
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -571,10 +572,10 @@ implements Serializable {
|
||||||
|
|
||||||
// a filter that only allows through the first hit
|
// a filter that only allows through the first hit
|
||||||
Filter filt = new Filter() {
|
Filter filt = new Filter() {
|
||||||
public BitSet bits(IndexReader reader) throws IOException {
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
BitSet bs = new BitSet(reader.maxDoc());
|
BitSet bs = new BitSet(reader.maxDoc());
|
||||||
bs.set(docs1.scoreDocs[0].doc);
|
bs.set(docs1.scoreDocs[0].doc);
|
||||||
return bs;
|
return new DocIdBitSet(bs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -56,20 +56,36 @@ public class TestSpanQueryFilter extends LuceneTestCase {
|
||||||
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
|
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
|
||||||
SpanQueryFilter filter = new SpanQueryFilter(query);
|
SpanQueryFilter filter = new SpanQueryFilter(query);
|
||||||
SpanFilterResult result = filter.bitSpans(reader);
|
SpanFilterResult result = filter.bitSpans(reader);
|
||||||
BitSet bits = result.getBits();
|
DocIdSet docIdSet = result.getDocIdSet();
|
||||||
assertTrue("bits is null and it shouldn't be", bits != null);
|
assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
|
||||||
assertTrue("tenth bit is not on", bits.get(10));
|
assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
|
||||||
List spans = result.getPositions();
|
List spans = result.getPositions();
|
||||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||||
assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality());
|
int size = getDocIdSetSize(docIdSet);
|
||||||
|
assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
|
||||||
for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
|
for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
|
||||||
SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
|
SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
|
||||||
assertTrue("info is null and it shouldn't be", info != null);
|
assertTrue("info is null and it shouldn't be", info != null);
|
||||||
//The doc should indicate the bit is on
|
//The doc should indicate the bit is on
|
||||||
assertTrue("Bit is not on and it should be", bits.get(info.getDoc()));
|
assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
|
||||||
//There should be two positions in each
|
//There should be two positions in each
|
||||||
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
|
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
|
||||||
}
|
}
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
|
||||||
|
int size = 0;
|
||||||
|
DocIdSetIterator it = docIdSet.iterator();
|
||||||
|
while (it.next()) {
|
||||||
|
size++;
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
|
||||||
|
DocIdSetIterator it = docIdSet.iterator();
|
||||||
|
assertTrue(msg, it.skipTo(docId));
|
||||||
|
assertTrue(msg, it.doc() == docId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,203 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class TestOpenBitSet extends TestCase {
|
||||||
|
static Random rand = new Random();
|
||||||
|
|
||||||
|
void doGet(BitSet a, OpenBitSet b) {
|
||||||
|
int max = a.size();
|
||||||
|
for (int i=0; i<max; i++) {
|
||||||
|
if (a.get(i) != b.get(i)) {
|
||||||
|
fail("mismatch: BitSet=["+i+"]="+a.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void doNextSetBit(BitSet a, OpenBitSet b) {
|
||||||
|
int aa=-1,bb=-1;
|
||||||
|
do {
|
||||||
|
aa = a.nextSetBit(aa+1);
|
||||||
|
bb = b.nextSetBit(bb+1);
|
||||||
|
assertEquals(aa,bb);
|
||||||
|
} while (aa>=0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// test interleaving different BitSetIterator.next()
|
||||||
|
void doIterate(BitSet a, OpenBitSet b) {
|
||||||
|
int aa=-1,bb=-1;
|
||||||
|
OpenBitSetIterator iterator = new OpenBitSetIterator(b);
|
||||||
|
do {
|
||||||
|
aa = a.nextSetBit(aa+1);
|
||||||
|
if (rand.nextBoolean())
|
||||||
|
iterator.next();
|
||||||
|
else
|
||||||
|
iterator.skipTo(bb+1);
|
||||||
|
bb = iterator.doc();
|
||||||
|
assertEquals(aa,bb);
|
||||||
|
} while (aa>=0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void doRandomSets(int maxSize, int iter) {
|
||||||
|
BitSet a0=null;
|
||||||
|
OpenBitSet b0=null;
|
||||||
|
|
||||||
|
for (int i=0; i<iter; i++) {
|
||||||
|
int sz = rand.nextInt(maxSize);
|
||||||
|
BitSet a = new BitSet(sz);
|
||||||
|
OpenBitSet b = new OpenBitSet(sz);
|
||||||
|
|
||||||
|
// test the various ways of setting bits
|
||||||
|
if (sz>0) {
|
||||||
|
int nOper = rand.nextInt(sz);
|
||||||
|
for (int j=0; j<nOper; j++) {
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
idx = rand.nextInt(sz);
|
||||||
|
a.set(idx);
|
||||||
|
b.fastSet(idx);
|
||||||
|
idx = rand.nextInt(sz);
|
||||||
|
a.clear(idx);
|
||||||
|
b.fastClear(idx);
|
||||||
|
idx = rand.nextInt(sz);
|
||||||
|
a.flip(idx);
|
||||||
|
b.fastFlip(idx);
|
||||||
|
|
||||||
|
boolean val = b.flipAndGet(idx);
|
||||||
|
boolean val2 = b.flipAndGet(idx);
|
||||||
|
assertTrue(val != val2);
|
||||||
|
|
||||||
|
val = b.getAndSet(idx);
|
||||||
|
assertTrue(val2 == val);
|
||||||
|
assertTrue(b.get(idx));
|
||||||
|
|
||||||
|
if (!val) b.fastClear(idx);
|
||||||
|
assertTrue(b.get(idx) == val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test that the various ways of accessing the bits are equivalent
|
||||||
|
doGet(a,b);
|
||||||
|
|
||||||
|
// test ranges, including possible extension
|
||||||
|
int fromIndex, toIndex;
|
||||||
|
fromIndex = rand.nextInt(sz+80);
|
||||||
|
toIndex = fromIndex + rand.nextInt((sz>>1)+1);
|
||||||
|
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
|
||||||
|
OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
|
||||||
|
|
||||||
|
doIterate(aa,bb); // a problem here is from flip or doIterate
|
||||||
|
|
||||||
|
fromIndex = rand.nextInt(sz+80);
|
||||||
|
toIndex = fromIndex + rand.nextInt((sz>>1)+1);
|
||||||
|
aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex);
|
||||||
|
bb = (OpenBitSet)b.clone(); bb.clear(fromIndex,toIndex);
|
||||||
|
|
||||||
|
doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit
|
||||||
|
|
||||||
|
fromIndex = rand.nextInt(sz+80);
|
||||||
|
toIndex = fromIndex + rand.nextInt((sz>>1)+1);
|
||||||
|
aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex);
|
||||||
|
bb = (OpenBitSet)b.clone(); bb.set(fromIndex,toIndex);
|
||||||
|
|
||||||
|
doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit
|
||||||
|
|
||||||
|
|
||||||
|
if (a0 != null) {
|
||||||
|
assertEquals( a.equals(a0), b.equals(b0));
|
||||||
|
|
||||||
|
assertEquals(a.cardinality(), b.cardinality());
|
||||||
|
|
||||||
|
BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
|
||||||
|
BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
|
||||||
|
BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
|
||||||
|
BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
|
||||||
|
|
||||||
|
OpenBitSet b_and = (OpenBitSet)b.clone(); assertEquals(b,b_and); b_and.and(b0);
|
||||||
|
OpenBitSet b_or = (OpenBitSet)b.clone(); b_or.or(b0);
|
||||||
|
OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
|
||||||
|
OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
|
||||||
|
|
||||||
|
doIterate(a_and,b_and);
|
||||||
|
doIterate(a_or,b_or);
|
||||||
|
doIterate(a_xor,b_xor);
|
||||||
|
doIterate(a_andn,b_andn);
|
||||||
|
|
||||||
|
assertEquals(a_and.cardinality(), b_and.cardinality());
|
||||||
|
assertEquals(a_or.cardinality(), b_or.cardinality());
|
||||||
|
assertEquals(a_xor.cardinality(), b_xor.cardinality());
|
||||||
|
assertEquals(a_andn.cardinality(), b_andn.cardinality());
|
||||||
|
|
||||||
|
// test non-mutating popcounts
|
||||||
|
assertEquals(b_and.cardinality(), OpenBitSet.intersectionCount(b,b0));
|
||||||
|
assertEquals(b_or.cardinality(), OpenBitSet.unionCount(b,b0));
|
||||||
|
assertEquals(b_xor.cardinality(), OpenBitSet.xorCount(b,b0));
|
||||||
|
assertEquals(b_andn.cardinality(), OpenBitSet.andNotCount(b,b0));
|
||||||
|
}
|
||||||
|
|
||||||
|
a0=a;
|
||||||
|
b0=b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
|
||||||
|
// larger testsuite.
|
||||||
|
public void testSmall() {
|
||||||
|
doRandomSets(1200,1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBig() {
|
||||||
|
// uncomment to run a bigger test (~2 minutes).
|
||||||
|
// doRandomSets(2000,200000);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals() {
|
||||||
|
OpenBitSet b1 = new OpenBitSet(1111);
|
||||||
|
OpenBitSet b2 = new OpenBitSet(2222);
|
||||||
|
assertTrue(b1.equals(b2));
|
||||||
|
assertTrue(b2.equals(b1));
|
||||||
|
b1.set(10);
|
||||||
|
assertFalse(b1.equals(b2));
|
||||||
|
assertFalse(b2.equals(b1));
|
||||||
|
b2.set(10);
|
||||||
|
assertTrue(b1.equals(b2));
|
||||||
|
assertTrue(b2.equals(b1));
|
||||||
|
b2.set(2221);
|
||||||
|
assertFalse(b1.equals(b2));
|
||||||
|
assertFalse(b2.equals(b1));
|
||||||
|
b1.set(2221);
|
||||||
|
assertTrue(b1.equals(b2));
|
||||||
|
assertTrue(b2.equals(b1));
|
||||||
|
|
||||||
|
// try different type of object
|
||||||
|
assertFalse(b1.equals(new Object()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,198 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
public class TestSortedVIntList extends TestCase {
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run(new TestSuite(TestSortedVIntList.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
void tstIterator (
|
||||||
|
SortedVIntList vintList,
|
||||||
|
int[] ints) throws IOException {
|
||||||
|
for (int i = 0; i < ints.length; i++) {
|
||||||
|
if ((i > 0) && (ints[i-1] == ints[i])) {
|
||||||
|
return; // DocNrSkipper should not skip to same document.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DocIdSetIterator m = vintList.iterator();
|
||||||
|
for (int i = 0; i < ints.length; i++) {
|
||||||
|
assertTrue("No end of Matcher at: " + i, m.next());
|
||||||
|
assertEquals(ints[i], m.doc());
|
||||||
|
}
|
||||||
|
assertTrue("End of Matcher", (! m.next()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void tstVIntList(
|
||||||
|
SortedVIntList vintList,
|
||||||
|
int[] ints,
|
||||||
|
int expectedByteSize) throws IOException {
|
||||||
|
assertEquals("Size", ints.length, vintList.size());
|
||||||
|
assertEquals("Byte size", expectedByteSize, vintList.getByteSize());
|
||||||
|
tstIterator(vintList, ints);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException {
|
||||||
|
final int MAX_INT_FOR_BITSET = 1024 * 1024;
|
||||||
|
BitSet bs = new BitSet();
|
||||||
|
for (int i = 0; i < ints.length; i++) {
|
||||||
|
if (ints[i] > MAX_INT_FOR_BITSET) {
|
||||||
|
return; // BitSet takes too much memory
|
||||||
|
}
|
||||||
|
if ((i > 0) && (ints[i-1] == ints[i])) {
|
||||||
|
return; // BitSet cannot store duplicate.
|
||||||
|
}
|
||||||
|
bs.set(ints[i]);
|
||||||
|
}
|
||||||
|
SortedVIntList svil = new SortedVIntList(bs);
|
||||||
|
tstVIntList(svil, ints, expectedByteSize);
|
||||||
|
tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int VB1 = 0x7F;
|
||||||
|
private static final int BIT_SHIFT = 7;
|
||||||
|
private static final int VB2 = (VB1 << BIT_SHIFT) | VB1;
|
||||||
|
private static final int VB3 = (VB2 << BIT_SHIFT) | VB1;
|
||||||
|
private static final int VB4 = (VB3 << BIT_SHIFT) | VB1;
|
||||||
|
|
||||||
|
private int vIntByteSize(int i) {
|
||||||
|
assert i >= 0;
|
||||||
|
if (i <= VB1) return 1;
|
||||||
|
if (i <= VB2) return 2;
|
||||||
|
if (i <= VB3) return 3;
|
||||||
|
if (i <= VB4) return 4;
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int vIntListByteSize(int [] ints) {
|
||||||
|
int byteSize = 0;
|
||||||
|
int last = 0;
|
||||||
|
for (int i = 0; i < ints.length; i++) {
|
||||||
|
byteSize += vIntByteSize(ints[i] - last);
|
||||||
|
last = ints[i];
|
||||||
|
}
|
||||||
|
return byteSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void tstInts(int [] ints) {
|
||||||
|
int expectedByteSize = vIntListByteSize(ints);
|
||||||
|
try {
|
||||||
|
tstVIntList(new SortedVIntList(ints), ints, expectedByteSize);
|
||||||
|
tstViaBitSet(ints, expectedByteSize);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new Error(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void tstIllegalArgExc(int [] ints) {
|
||||||
|
try {
|
||||||
|
new SortedVIntList(ints);
|
||||||
|
}
|
||||||
|
catch (IllegalArgumentException e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fail("Expected IllegalArgumentException");
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] fibArray(int a, int b, int size) {
|
||||||
|
final int[] fib = new int[size];
|
||||||
|
fib[0] = a;
|
||||||
|
fib[1] = b;
|
||||||
|
for (int i = 2; i < size; i++) {
|
||||||
|
fib[i] = fib[i-1] + fib[i-2];
|
||||||
|
}
|
||||||
|
return fib;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences
|
||||||
|
final int[] res = new int[ints.length];
|
||||||
|
for (int i = 0; i < ints.length; i++) {
|
||||||
|
res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test01() {
|
||||||
|
tstInts(new int[] {});
|
||||||
|
}
|
||||||
|
public void test02() {
|
||||||
|
tstInts(new int[] {0});
|
||||||
|
}
|
||||||
|
public void test03() {
|
||||||
|
tstInts(new int[] {0,Integer.MAX_VALUE});
|
||||||
|
}
|
||||||
|
public void test04a() {
|
||||||
|
tstInts(new int[] {0, VB2 - 1});
|
||||||
|
}
|
||||||
|
public void test04b() {
|
||||||
|
tstInts(new int[] {0, VB2});
|
||||||
|
}
|
||||||
|
public void test04c() {
|
||||||
|
tstInts(new int[] {0, VB2 + 1});
|
||||||
|
}
|
||||||
|
public void test05() {
|
||||||
|
tstInts(fibArray(0,1,7)); // includes duplicate value 1
|
||||||
|
}
|
||||||
|
public void test05b() {
|
||||||
|
tstInts(reverseDiffs(fibArray(0,1,7)));
|
||||||
|
}
|
||||||
|
public void test06() {
|
||||||
|
tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int.
|
||||||
|
}
|
||||||
|
public void test06b() {
|
||||||
|
tstInts(reverseDiffs(fibArray(1,2,45)));
|
||||||
|
}
|
||||||
|
public void test07a() {
|
||||||
|
tstInts(new int[] {0, VB3});
|
||||||
|
}
|
||||||
|
public void test07b() {
|
||||||
|
tstInts(new int[] {1, VB3 + 2});
|
||||||
|
}
|
||||||
|
public void test07c() {
|
||||||
|
tstInts(new int[] {2, VB3 + 4});
|
||||||
|
}
|
||||||
|
public void test08a() {
|
||||||
|
tstInts(new int[] {0, VB4 + 1});
|
||||||
|
}
|
||||||
|
public void test08b() {
|
||||||
|
tstInts(new int[] {1, VB4 + 1});
|
||||||
|
}
|
||||||
|
public void test08c() {
|
||||||
|
tstInts(new int[] {2, VB4 + 1});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test10() {
|
||||||
|
tstIllegalArgExc(new int[] {-1});
|
||||||
|
}
|
||||||
|
public void test11() {
|
||||||
|
tstIllegalArgExc(new int[] {1,0});
|
||||||
|
}
|
||||||
|
public void test12() {
|
||||||
|
tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue