mirror of https://github.com/apache/lucene.git
LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@712908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
abe6ea0828
commit
b179b038a1
|
@ -70,6 +70,9 @@ Optimizations
|
|||
more efficient (single pass) by not creating & populating an
|
||||
intermediate OpenBitSet (Paul Elschot, Mike McCandless)
|
||||
|
||||
2. LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()
|
||||
(Paul Elschot via yonik)
|
||||
|
||||
|
||||
Documentation
|
||||
|
||||
|
|
|
@ -688,7 +688,7 @@ public class BitUtil {
|
|||
public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
|
||||
|
||||
|
||||
/** Returns number of trailing zeros in the 64 bit long value. */
|
||||
/** Returns number of trailing zeros in a 64 bit long value. */
|
||||
public static int ntz(long val) {
|
||||
// A full binary search to determine the low byte was slower than
|
||||
// a linear search for nextSetBit(). This is most likely because
|
||||
|
@ -728,6 +728,23 @@ public class BitUtil {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns number of trailing zeros in a 32 bit int value. */
|
||||
public static int ntz(int val) {
|
||||
// This implementation does a single binary search at the top level only.
|
||||
// In addition, the case of a non-zero first byte is checked for first
|
||||
// because it is the most common in dense bit arrays.
|
||||
|
||||
int lowByte = val & 0xff;
|
||||
if (lowByte != 0) return ntzTable[lowByte];
|
||||
lowByte = (val>>>8) & 0xff;
|
||||
if (lowByte != 0) return ntzTable[lowByte] + 8;
|
||||
lowByte = (val>>>16) & 0xff;
|
||||
if (lowByte != 0) return ntzTable[lowByte] + 16;
|
||||
// no need to mask off low byte for the last byte.
|
||||
// no need to check for zero on the last byte either.
|
||||
return ntzTable[val>>>24] + 24;
|
||||
}
|
||||
|
||||
/** returns 0 based index of first set bit
|
||||
* (only works for x!=0)
|
||||
* <br/> This is an alternate implementation of ntz()
|
||||
|
|
|
@ -327,6 +327,43 @@ public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
|
|||
bits[wordNum] &= ~bitmask;
|
||||
}
|
||||
|
||||
/** Clears a range of bits. Clearing past the end does not change the size of the set.
|
||||
*
|
||||
* @param startIndex lower index
|
||||
* @param endIndex one-past the last bit to clear
|
||||
*/
|
||||
public void clear(int startIndex, int endIndex) {
|
||||
if (endIndex <= startIndex) return;
|
||||
|
||||
int startWord = (startIndex>>6);
|
||||
if (startWord >= wlen) return;
|
||||
|
||||
// since endIndex is one past the end, this is index of the last
|
||||
// word to be changed.
|
||||
int endWord = ((endIndex-1)>>6);
|
||||
|
||||
long startmask = -1L << startIndex;
|
||||
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
|
||||
|
||||
// invert masks since we are clearing
|
||||
startmask = ~startmask;
|
||||
endmask = ~endmask;
|
||||
|
||||
if (startWord == endWord) {
|
||||
bits[startWord] &= (startmask | endmask);
|
||||
return;
|
||||
}
|
||||
|
||||
bits[startWord] &= startmask;
|
||||
|
||||
int middle = Math.min(wlen, endWord);
|
||||
Arrays.fill(bits, startWord+1, middle, 0L);
|
||||
if (endWord < wlen) {
|
||||
bits[endWord] &= endmask;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Clears a range of bits. Clearing past the end does not change the size of the set.
|
||||
*
|
||||
* @param startIndex lower index
|
||||
|
|
|
@ -1,101 +1,96 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
public class OpenBitSetDISI extends OpenBitSet {
|
||||
|
||||
/** Construct an OpenBitSetDISI with its bits set
|
||||
* from the doc ids of the given DocIdSetIterator.
|
||||
* Also give a maximum size one larger than the largest doc id for which a
|
||||
* bit may ever be set on this OpenBitSetDISI.
|
||||
*/
|
||||
public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
|
||||
super(maxSize);
|
||||
inPlaceOr(disi);
|
||||
}
|
||||
|
||||
/** Construct an OpenBitSetDISI with no bits set, and a given maximum size
|
||||
* one larger than the largest doc id for which a bit may ever be set
|
||||
* on this OpenBitSetDISI.
|
||||
*/
|
||||
public OpenBitSetDISI(int maxSize) {
|
||||
super(maxSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace OR with the doc ids from a given DocIdSetIterator,
|
||||
* setting the bit for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastSet(disi.doc());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace AND with the doc ids from a given DocIdSetIterator,
|
||||
* leaving only the bits set for which the doc ids are in common.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
|
||||
int index = nextSetBit(0);
|
||||
int lastNotCleared = -1;
|
||||
while ((index != -1) && disi.skipTo(index)) {
|
||||
while ((index != -1) && (index < disi.doc())) {
|
||||
fastClear(index);
|
||||
index = nextSetBit(index + 1);
|
||||
}
|
||||
if (index == disi.doc()) {
|
||||
lastNotCleared = index;
|
||||
index++;
|
||||
}
|
||||
assert (index == -1) || (index > disi.doc());
|
||||
}
|
||||
clear(lastNotCleared+1, size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
|
||||
* clearing all the bits for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastClear(disi.doc());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
|
||||
* flipping all the bits for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastFlip(disi.doc());
|
||||
}
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
public class OpenBitSetDISI extends OpenBitSet {
|
||||
|
||||
/** Construct an OpenBitSetDISI with its bits set
|
||||
* from the doc ids of the given DocIdSetIterator.
|
||||
* Also give a maximum size one larger than the largest doc id for which a
|
||||
* bit may ever be set on this OpenBitSetDISI.
|
||||
*/
|
||||
public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
|
||||
super(maxSize);
|
||||
inPlaceOr(disi);
|
||||
}
|
||||
|
||||
/** Construct an OpenBitSetDISI with no bits set, and a given maximum size
|
||||
* one larger than the largest doc id for which a bit may ever be set
|
||||
* on this OpenBitSetDISI.
|
||||
*/
|
||||
public OpenBitSetDISI(int maxSize) {
|
||||
super(maxSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace OR with the doc ids from a given DocIdSetIterator,
|
||||
* setting the bit for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastSet(disi.doc());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace AND with the doc ids from a given DocIdSetIterator,
|
||||
* leaving only the bits set for which the doc ids are in common.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
|
||||
int bitSetDoc = nextSetBit(0);
|
||||
while ((bitSetDoc != -1) && disi.skipTo(bitSetDoc)) {
|
||||
int disiDoc = disi.doc();
|
||||
clear(bitSetDoc, disiDoc);
|
||||
bitSetDoc = nextSetBit(disiDoc + 1);
|
||||
}
|
||||
if (bitSetDoc != -1) {
|
||||
clear(bitSetDoc, size());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
|
||||
* clearing all the bits for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastClear(disi.doc());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
|
||||
* flipping all the bits for each such doc id.
|
||||
* These doc ids should be smaller than the maximum size passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
|
||||
while (disi.next() && (disi.doc() < size())) {
|
||||
fastFlip(disi.doc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue