From b179b038a1697c87f3cf92ee9710536e2e0cf3ec Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Tue, 11 Nov 2008 01:54:49 +0000 Subject: [PATCH] LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd() git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@712908 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 + src/java/org/apache/lucene/util/BitUtil.java | 19 +- .../org/apache/lucene/util/OpenBitSet.java | 37 ++++ .../apache/lucene/util/OpenBitSetDISI.java | 197 +++++++++--------- 4 files changed, 154 insertions(+), 102 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 50a1adddd5c..32f71b90b23 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -70,6 +70,9 @@ Optimizations more efficient (single pass) by not creating & populating an intermediate OpenBitSet (Paul Elschot, Mike McCandless) + 2. LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd() + (Paul Elschot via yonik) + Documentation diff --git a/src/java/org/apache/lucene/util/BitUtil.java b/src/java/org/apache/lucene/util/BitUtil.java index a9b283f1964..6f1a447c150 100644 --- a/src/java/org/apache/lucene/util/BitUtil.java +++ b/src/java/org/apache/lucene/util/BitUtil.java @@ -688,7 +688,7 @@ public class BitUtil { public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; - /** Returns number of trailing zeros in the 64 bit long value. */ + /** Returns number of trailing zeros in a 64 bit long value. */ public static int ntz(long val) { // A full binary search to determine the low byte was slower than // a linear search for nextSetBit(). This is most likely because @@ -728,6 +728,23 @@ public class BitUtil { } } + /** Returns number of trailing zeros in a 32 bit int value. */ + public static int ntz(int val) { + // This implementation does a single binary search at the top level only. + // In addition, the case of a non-zero first byte is checked for first + // because it is the most common in dense bit arrays. + + int lowByte = val & 0xff; + if (lowByte != 0) return ntzTable[lowByte]; + lowByte = (val>>>8) & 0xff; + if (lowByte != 0) return ntzTable[lowByte] + 8; + lowByte = (val>>>16) & 0xff; + if (lowByte != 0) return ntzTable[lowByte] + 16; + // no need to mask off low byte for the last byte. + // no need to check for zero on the last byte either. + return ntzTable[val>>>24] + 24; + } + /** returns 0 based index of first set bit * (only works for x!=0) *
This is an alternate implementation of ntz() diff --git a/src/java/org/apache/lucene/util/OpenBitSet.java b/src/java/org/apache/lucene/util/OpenBitSet.java index 02e1cb31c83..f3a3f5fad1e 100644 --- a/src/java/org/apache/lucene/util/OpenBitSet.java +++ b/src/java/org/apache/lucene/util/OpenBitSet.java @@ -327,6 +327,43 @@ public class OpenBitSet extends DocIdSet implements Cloneable, Serializable { bits[wordNum] &= ~bitmask; } + /** Clears a range of bits. Clearing past the end does not change the size of the set. + * + * @param startIndex lower index + * @param endIndex one-past the last bit to clear + */ + public void clear(int startIndex, int endIndex) { + if (endIndex <= startIndex) return; + + int startWord = (startIndex>>6); + if (startWord >= wlen) return; + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = ((endIndex-1)>>6); + + long startmask = -1L << startIndex; + long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + // invert masks since we are clearing + startmask = ~startmask; + endmask = ~endmask; + + if (startWord == endWord) { + bits[startWord] &= (startmask | endmask); + return; + } + + bits[startWord] &= startmask; + + int middle = Math.min(wlen, endWord); + Arrays.fill(bits, startWord+1, middle, 0L); + if (endWord < wlen) { + bits[endWord] &= endmask; + } + } + + /** Clears a range of bits. Clearing past the end does not change the size of the set. * * @param startIndex lower index diff --git a/src/java/org/apache/lucene/util/OpenBitSetDISI.java b/src/java/org/apache/lucene/util/OpenBitSetDISI.java index 71dfb4f9bf6..83471ac5c0f 100644 --- a/src/java/org/apache/lucene/util/OpenBitSetDISI.java +++ b/src/java/org/apache/lucene/util/OpenBitSetDISI.java @@ -1,101 +1,96 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.search.DocIdSetIterator; - -public class OpenBitSetDISI extends OpenBitSet { - - /** Construct an OpenBitSetDISI with its bits set - * from the doc ids of the given DocIdSetIterator. - * Also give a maximum size one larger than the largest doc id for which a - * bit may ever be set on this OpenBitSetDISI. - */ - public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException { - super(maxSize); - inPlaceOr(disi); - } - - /** Construct an OpenBitSetDISI with no bits set, and a given maximum size - * one larger than the largest doc id for which a bit may ever be set - * on this OpenBitSetDISI. - */ - public OpenBitSetDISI(int maxSize) { - super(maxSize); - } - - /** - * Perform an inplace OR with the doc ids from a given DocIdSetIterator, - * setting the bit for each such doc id. - * These doc ids should be smaller than the maximum size passed to the - * constructor. - */ - public void inPlaceOr(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastSet(disi.doc()); - } - } - - /** - * Perform an inplace AND with the doc ids from a given DocIdSetIterator, - * leaving only the bits set for which the doc ids are in common. - * These doc ids should be smaller than the maximum size passed to the - * constructor. - */ - public void inPlaceAnd(DocIdSetIterator disi) throws IOException { - int index = nextSetBit(0); - int lastNotCleared = -1; - while ((index != -1) && disi.skipTo(index)) { - while ((index != -1) && (index < disi.doc())) { - fastClear(index); - index = nextSetBit(index + 1); - } - if (index == disi.doc()) { - lastNotCleared = index; - index++; - } - assert (index == -1) || (index > disi.doc()); - } - clear(lastNotCleared+1, size()); - } - - /** - * Perform an inplace NOT with the doc ids from a given DocIdSetIterator, - * clearing all the bits for each such doc id. - * These doc ids should be smaller than the maximum size passed to the - * constructor. - */ - public void inPlaceNot(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastClear(disi.doc()); - } - } - - /** - * Perform an inplace XOR with the doc ids from a given DocIdSetIterator, - * flipping all the bits for each such doc id. - * These doc ids should be smaller than the maximum size passed to the - * constructor. - */ - public void inPlaceXor(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastFlip(disi.doc()); - } - } -} +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; + +public class OpenBitSetDISI extends OpenBitSet { + + /** Construct an OpenBitSetDISI with its bits set + * from the doc ids of the given DocIdSetIterator. + * Also give a maximum size one larger than the largest doc id for which a + * bit may ever be set on this OpenBitSetDISI. + */ + public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException { + super(maxSize); + inPlaceOr(disi); + } + + /** Construct an OpenBitSetDISI with no bits set, and a given maximum size + * one larger than the largest doc id for which a bit may ever be set + * on this OpenBitSetDISI. + */ + public OpenBitSetDISI(int maxSize) { + super(maxSize); + } + + /** + * Perform an inplace OR with the doc ids from a given DocIdSetIterator, + * setting the bit for each such doc id. + * These doc ids should be smaller than the maximum size passed to the + * constructor. + */ + public void inPlaceOr(DocIdSetIterator disi) throws IOException { + while (disi.next() && (disi.doc() < size())) { + fastSet(disi.doc()); + } + } + + /** + * Perform an inplace AND with the doc ids from a given DocIdSetIterator, + * leaving only the bits set for which the doc ids are in common. + * These doc ids should be smaller than the maximum size passed to the + * constructor. + */ + public void inPlaceAnd(DocIdSetIterator disi) throws IOException { + int bitSetDoc = nextSetBit(0); + while ((bitSetDoc != -1) && disi.skipTo(bitSetDoc)) { + int disiDoc = disi.doc(); + clear(bitSetDoc, disiDoc); + bitSetDoc = nextSetBit(disiDoc + 1); + } + if (bitSetDoc != -1) { + clear(bitSetDoc, size()); + } + } + + /** + * Perform an inplace NOT with the doc ids from a given DocIdSetIterator, + * clearing all the bits for each such doc id. + * These doc ids should be smaller than the maximum size passed to the + * constructor. + */ + public void inPlaceNot(DocIdSetIterator disi) throws IOException { + while (disi.next() && (disi.doc() < size())) { + fastClear(disi.doc()); + } + } + + /** + * Perform an inplace XOR with the doc ids from a given DocIdSetIterator, + * flipping all the bits for each such doc id. + * These doc ids should be smaller than the maximum size passed to the + * constructor. + */ + public void inPlaceXor(DocIdSetIterator disi) throws IOException { + while (disi.next() && (disi.doc() < size())) { + fastFlip(disi.doc()); + } + } +}