mirror of https://github.com/apache/lucene.git
LUCENE-6484: Removed EliasFanoDocIdSet.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680215 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0483f4c077
commit
f1c2a62f13
|
@ -196,6 +196,9 @@ API Changes
|
|||
SpanPayloadNearCheckQuery have moved from the .spans package to the .payloads
|
||||
package. (Alan Woodward, David Smiley, Paul Elschot)
|
||||
|
||||
* LUCENE-6484: Removed EliasFanoDocIdSet, which was unused.
|
||||
(Paul Elschot via Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6413: Test runner should report the number of suites completed/
|
||||
|
|
|
@ -127,64 +127,4 @@ public final class BitUtil {
|
|||
return ((l >>> 1) ^ -(l & 1));
|
||||
}
|
||||
|
||||
|
||||
/** Select a 1-bit from a long. See also LUCENE-6040.
|
||||
* @return The index of the r-th 1 bit in x. This bit must exist.
|
||||
*/
|
||||
public static int select(long x, int r) {
|
||||
long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums
|
||||
s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // nibblewise bitsums
|
||||
s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums, cumulative
|
||||
|
||||
int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >> 3) << 3; // bit position of byte with r-th 1 bit.
|
||||
long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b
|
||||
|
||||
// Select bit l from byte (x >>> b):
|
||||
int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8));
|
||||
int res = b + select256[selectIndex];
|
||||
return res;
|
||||
}
|
||||
|
||||
private final static long L8_L = 0x0101010101010101L;
|
||||
|
||||
private static final long[] psOverflow = new long[64];
|
||||
static {
|
||||
for (int s = 1; s <= 64; s++) {
|
||||
psOverflow[s-1] = (128-s) * L8_L;
|
||||
}
|
||||
}
|
||||
|
||||
private static final byte[] select256 = new byte[8 * 256];
|
||||
static {
|
||||
for (int b = 0; b <= 0xFF; b++) {
|
||||
for (int s = 1; s <= 8; s++) {
|
||||
int byteIndex = b | ((s-1) << 8);
|
||||
int bitIndex = selectNaive(b, s);
|
||||
if (bitIndex < 0) {
|
||||
bitIndex = 127; // positive as byte
|
||||
}
|
||||
assert bitIndex >= 0;
|
||||
assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign
|
||||
select256[byteIndex] = (byte) bitIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively.
|
||||
* Works relatively fast for low ranks.
|
||||
* @return The index of the r-th 1 bit in x, or -1 if no such bit exists.
|
||||
*/
|
||||
public static int selectNaive(long x, int r) {
|
||||
assert r >= 1;
|
||||
int s = -1;
|
||||
while ((x != 0L) && (r > 0)) {
|
||||
int ntz = Long.numberOfTrailingZeros(x);
|
||||
x >>>= (ntz + 1);
|
||||
s += (ntz + 1);
|
||||
r -= 1;
|
||||
}
|
||||
int res = (r > 0) ? -1 : s;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,482 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import org.apache.lucene.util.BitUtil; // bit selection in long
|
||||
|
||||
|
||||
/** A decoder for an {@link EliasFanoEncoder}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class EliasFanoDecoder {
|
||||
private static final int LOG2_LONG_SIZE = Long.numberOfTrailingZeros(Long.SIZE);
|
||||
|
||||
private final EliasFanoEncoder efEncoder;
|
||||
private final long numEncoded;
|
||||
private long efIndex = -1; // the decoding index.
|
||||
private long setBitForIndex = -1; // the index of the high bit at the decoding index.
|
||||
|
||||
public final static long NO_MORE_VALUES = -1L;
|
||||
|
||||
private final long numIndexEntries;
|
||||
private final long indexMask;
|
||||
|
||||
/** Construct a decoder for a given {@link EliasFanoEncoder}.
|
||||
* The decoding index is set to just before the first encoded value.
|
||||
*/
|
||||
public EliasFanoDecoder(EliasFanoEncoder efEncoder) {
|
||||
this.efEncoder = efEncoder;
|
||||
this.numEncoded = efEncoder.numEncoded; // not final in EliasFanoEncoder
|
||||
this.numIndexEntries = efEncoder.currentEntryIndex; // not final in EliasFanoEncoder
|
||||
this.indexMask = (1L << efEncoder.nIndexEntryBits) - 1;
|
||||
}
|
||||
|
||||
/** @return The Elias-Fano encoder that is decoded. */
|
||||
public EliasFanoEncoder getEliasFanoEncoder() {
|
||||
return efEncoder;
|
||||
}
|
||||
|
||||
/** The number of values encoded by the encoder.
|
||||
* @return The number of values encoded by the encoder.
|
||||
*/
|
||||
public long numEncoded() {
|
||||
return numEncoded;
|
||||
}
|
||||
|
||||
|
||||
/** The current decoding index.
|
||||
* The first value encoded by {@link EliasFanoEncoder#encodeNext} has index 0.
|
||||
* Only valid directly after
|
||||
* {@link #nextValue}, {@link #advanceToValue},
|
||||
* {@link #previousValue}, or {@link #backToValue}
|
||||
* returned another value than {@link #NO_MORE_VALUES},
|
||||
* or {@link #advanceToIndex} returned true.
|
||||
* @return The decoding index of the last decoded value, or as last set by {@link #advanceToIndex}.
|
||||
*/
|
||||
public long currentIndex() {
|
||||
if (efIndex < 0) {
|
||||
throw new IllegalStateException("index before sequence");
|
||||
}
|
||||
if (efIndex >= numEncoded) {
|
||||
throw new IllegalStateException("index after sequence");
|
||||
}
|
||||
return efIndex;
|
||||
}
|
||||
|
||||
/** The value at the current decoding index.
|
||||
* Only valid when {@link #currentIndex} would return a valid result.
|
||||
* <br>This is only intended for use after {@link #advanceToIndex} returned true.
|
||||
* @return The value encoded at {@link #currentIndex}.
|
||||
*/
|
||||
public long currentValue() {
|
||||
return combineHighLowValues(currentHighValue(), currentLowValue());
|
||||
}
|
||||
|
||||
/** @return The high value for the current decoding index. */
|
||||
private long currentHighValue() {
|
||||
return setBitForIndex - efIndex; // sequence of unary gaps
|
||||
}
|
||||
|
||||
/** See also {@link EliasFanoEncoder#packValue} */
|
||||
private static long unPackValue(long[] longArray, int numBits, long packIndex, long bitsMask) {
|
||||
if (numBits == 0) {
|
||||
return 0;
|
||||
}
|
||||
long bitPos = packIndex * numBits;
|
||||
int index = (int) (bitPos >>> LOG2_LONG_SIZE);
|
||||
int bitPosAtIndex = (int) (bitPos & (Long.SIZE-1));
|
||||
long value = longArray[index] >>> bitPosAtIndex;
|
||||
if ((bitPosAtIndex + numBits) > Long.SIZE) {
|
||||
value |= (longArray[index + 1] << (Long.SIZE - bitPosAtIndex));
|
||||
}
|
||||
value &= bitsMask;
|
||||
return value;
|
||||
}
|
||||
|
||||
/** @return The low value for the current decoding index. */
|
||||
private long currentLowValue() {
|
||||
assert ((efIndex >= 0) && (efIndex < numEncoded)) : "efIndex " + efIndex;
|
||||
return unPackValue(efEncoder.lowerLongs, efEncoder.numLowBits, efIndex, efEncoder.lowerBitsMask);
|
||||
}
|
||||
|
||||
/** @return The given highValue shifted left by the number of low bits from by the EliasFanoSequence,
|
||||
* logically OR-ed with the given lowValue.
|
||||
*/
|
||||
private long combineHighLowValues(long highValue, long lowValue) {
|
||||
return (highValue << efEncoder.numLowBits) | lowValue;
|
||||
}
|
||||
|
||||
private long curHighLong;
|
||||
|
||||
|
||||
/* The implementation of forward decoding and backward decoding is done by the following method pairs.
|
||||
*
|
||||
* toBeforeSequence - toAfterSequence
|
||||
* getCurrentRightShift - getCurrentLeftShift
|
||||
* toAfterCurrentHighBit - toBeforeCurrentHighBit
|
||||
* toNextHighLong - toPreviousHighLong
|
||||
* nextHighValue - previousHighValue
|
||||
* nextValue - previousValue
|
||||
* advanceToValue - backToValue
|
||||
*
|
||||
*/
|
||||
|
||||
/* Forward decoding section */
|
||||
|
||||
|
||||
/** Set the decoding index to just before the first encoded value.
|
||||
*/
|
||||
public void toBeforeSequence() {
|
||||
efIndex = -1;
|
||||
setBitForIndex = -1;
|
||||
}
|
||||
|
||||
/** @return the number of bits in a long after (setBitForIndex modulo Long.SIZE) */
|
||||
private int getCurrentRightShift() {
|
||||
int s = (int) (setBitForIndex & (Long.SIZE-1));
|
||||
return s;
|
||||
}
|
||||
|
||||
/** Increment efIndex and setBitForIndex and
|
||||
* shift curHighLong so that it does not contain the high bits before setBitForIndex.
|
||||
* @return true iff efIndex still smaller than numEncoded.
|
||||
*/
|
||||
private boolean toAfterCurrentHighBit() {
|
||||
efIndex += 1;
|
||||
if (efIndex >= numEncoded) {
|
||||
return false;
|
||||
}
|
||||
setBitForIndex += 1;
|
||||
int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
curHighLong = efEncoder.upperLongs[highIndex] >>> getCurrentRightShift();
|
||||
return true;
|
||||
}
|
||||
|
||||
/** The current high long has been determined to not contain the set bit that is needed.
|
||||
* Increment setBitForIndex to the next high long and set curHighLong accordingly.
|
||||
*/
|
||||
private void toNextHighLong() {
|
||||
setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
|
||||
//assert getCurrentRightShift() == 0;
|
||||
int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
curHighLong = efEncoder.upperLongs[highIndex];
|
||||
}
|
||||
|
||||
/** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
|
||||
* by incrementing setBitForIndex, and by setting curHighLong accordingly.
|
||||
*/
|
||||
private void toNextHighValue() {
|
||||
while (curHighLong == 0L) {
|
||||
toNextHighLong(); // inlining and unrolling would simplify somewhat
|
||||
}
|
||||
setBitForIndex += Long.numberOfTrailingZeros(curHighLong);
|
||||
}
|
||||
|
||||
/** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
|
||||
* by incrementing setBitForIndex, and by setting curHighLong accordingly.
|
||||
* @return the next encoded high value.
|
||||
*/
|
||||
private long nextHighValue() {
|
||||
toNextHighValue();
|
||||
return currentHighValue();
|
||||
}
|
||||
|
||||
/** If another value is available after the current decoding index, return this value and
|
||||
* and increase the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
|
||||
*/
|
||||
public long nextValue() {
|
||||
if (! toAfterCurrentHighBit()) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
long highValue = nextHighValue();
|
||||
return combineHighLowValues(highValue, currentLowValue());
|
||||
}
|
||||
|
||||
/** Advance the decoding index to a given index.
|
||||
* and return <code>true</code> iff it is available.
|
||||
* <br>See also {@link #currentValue}.
|
||||
* <br>The current implementation does not use the index on the upper bit zero bit positions.
|
||||
* <br>Note: there is currently no implementation of <code>backToIndex</code>.
|
||||
*/
|
||||
public boolean advanceToIndex(long index) {
|
||||
assert index > efIndex;
|
||||
if (index >= numEncoded) {
|
||||
efIndex = numEncoded;
|
||||
return false;
|
||||
}
|
||||
if (! toAfterCurrentHighBit()) {
|
||||
assert false;
|
||||
}
|
||||
/* CHECKME: Add a (binary) search in the upperZeroBitPositions here. */
|
||||
int curSetBits = Long.bitCount(curHighLong);
|
||||
while ((efIndex + curSetBits) < index) { // curHighLong has not enough set bits to reach index
|
||||
efIndex += curSetBits;
|
||||
toNextHighLong();
|
||||
curSetBits = Long.bitCount(curHighLong);
|
||||
}
|
||||
// curHighLong has enough set bits to reach index
|
||||
while (efIndex < index) {
|
||||
/* CHECKME: Instead of the linear search here, use (forward) broadword selection from
|
||||
* "Broadword Implementation of Rank/Select Queries", Sebastiano Vigna, January 30, 2012.
|
||||
*/
|
||||
if (! toAfterCurrentHighBit()) {
|
||||
assert false;
|
||||
}
|
||||
toNextHighValue();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Given a target value, advance the decoding index to the first bigger or equal value
|
||||
* and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
|
||||
* <br>The current implementation uses the index on the upper zero bit positions.
|
||||
*/
|
||||
public long advanceToValue(long target) {
|
||||
efIndex += 1;
|
||||
if (efIndex >= numEncoded) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
setBitForIndex += 1; // the high bit at setBitForIndex belongs to the unary code for efIndex
|
||||
|
||||
int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
long upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1))); // may contain the unary 1 bit for efIndex
|
||||
|
||||
// determine index entry to advance to
|
||||
long highTarget = target >>> efEncoder.numLowBits;
|
||||
|
||||
long indexEntryIndex = (highTarget / efEncoder.indexInterval) - 1;
|
||||
if (indexEntryIndex >= 0) { // not before first index entry
|
||||
if (indexEntryIndex >= numIndexEntries) {
|
||||
indexEntryIndex = numIndexEntries - 1; // no further than last index entry
|
||||
}
|
||||
long indexHighValue = (indexEntryIndex + 1) * efEncoder.indexInterval;
|
||||
assert indexHighValue <= highTarget;
|
||||
if (indexHighValue > (setBitForIndex - efIndex)) { // advance to just after zero bit position of index entry.
|
||||
setBitForIndex = unPackValue(efEncoder.upperZeroBitPositionIndex, efEncoder.nIndexEntryBits, indexEntryIndex, indexMask);
|
||||
efIndex = setBitForIndex - indexHighValue; // the high bit at setBitForIndex belongs to the unary code for efIndex
|
||||
highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1))); // may contain the unary 1 bit for efIndex
|
||||
}
|
||||
assert efIndex < numEncoded; // there is a high value to be found.
|
||||
}
|
||||
|
||||
int curSetBits = Long.bitCount(curHighLong); // shifted right.
|
||||
int curClearBits = Long.SIZE - curSetBits - ((int) (setBitForIndex & (Long.SIZE-1))); // subtract right shift, may be more than encoded
|
||||
|
||||
while (((setBitForIndex - efIndex) + curClearBits) < highTarget) {
|
||||
// curHighLong has not enough clear bits to reach highTarget
|
||||
efIndex += curSetBits;
|
||||
if (efIndex >= numEncoded) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
|
||||
// highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
highIndex += 1;
|
||||
upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong;
|
||||
curSetBits = Long.bitCount(curHighLong);
|
||||
curClearBits = Long.SIZE - curSetBits;
|
||||
}
|
||||
// curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.
|
||||
while (curHighLong == 0L) {
|
||||
setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
|
||||
assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
highIndex += 1;
|
||||
upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong;
|
||||
}
|
||||
|
||||
// curHighLong has enough clear bits to reach highTarget, has at least 1 set bit, and may not have enough set bits.
|
||||
int rank = (int) (highTarget - (setBitForIndex - efIndex)); // the rank of the zero bit for highValue.
|
||||
assert (rank <= Long.SIZE) : ("rank " + rank);
|
||||
if (rank >= 1) {
|
||||
long invCurHighLong = ~curHighLong;
|
||||
int clearBitForValue = (rank <= 8)
|
||||
? BitUtil.selectNaive(invCurHighLong, rank)
|
||||
: BitUtil.select(invCurHighLong, rank);
|
||||
assert clearBitForValue >= 0;
|
||||
assert clearBitForValue <= Long.SIZE-1;
|
||||
setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
|
||||
int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
|
||||
efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex
|
||||
if (efIndex >= numEncoded) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
|
||||
if ((setBitForIndex & (Long.SIZE - 1)) == 0L) { // exhausted curHighLong
|
||||
assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
highIndex += 1;
|
||||
upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong;
|
||||
}
|
||||
else {
|
||||
assert highIndex == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1)));
|
||||
}
|
||||
// curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.
|
||||
|
||||
while (curHighLong == 0L) {
|
||||
setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
|
||||
assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
highIndex += 1;
|
||||
upperLong = efEncoder.upperLongs[highIndex];
|
||||
curHighLong = upperLong;
|
||||
}
|
||||
}
|
||||
setBitForIndex += Long.numberOfTrailingZeros(curHighLong);
|
||||
assert (setBitForIndex - efIndex) >= highTarget; // highTarget reached
|
||||
|
||||
// Linear search also with low values
|
||||
long currentValue = combineHighLowValues((setBitForIndex - efIndex), currentLowValue());
|
||||
while (currentValue < target) {
|
||||
currentValue = nextValue();
|
||||
if (currentValue == NO_MORE_VALUES) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
}
|
||||
return currentValue;
|
||||
}
|
||||
|
||||
|
||||
/* Backward decoding section */
|
||||
|
||||
/** Set the decoding index to just after the last encoded value.
|
||||
*/
|
||||
public void toAfterSequence() {
|
||||
efIndex = numEncoded; // just after last index
|
||||
setBitForIndex = (efEncoder.lastEncoded >>> efEncoder.numLowBits) + numEncoded;
|
||||
}
|
||||
|
||||
/** @return the number of bits in a long before (setBitForIndex modulo Long.SIZE) */
|
||||
private int getCurrentLeftShift() {
|
||||
int s = Long.SIZE - 1 - (int) (setBitForIndex & (Long.SIZE-1));
|
||||
return s;
|
||||
}
|
||||
|
||||
/** Decrement efindex and setBitForIndex and
|
||||
* shift curHighLong so that it does not contain the high bits after setBitForIndex.
|
||||
* @return true iff efindex still {@code >= 0}
|
||||
*/
|
||||
private boolean toBeforeCurrentHighBit() {
|
||||
efIndex -= 1;
|
||||
if (efIndex < 0) {
|
||||
return false;
|
||||
}
|
||||
setBitForIndex -= 1;
|
||||
int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
curHighLong = efEncoder.upperLongs[highIndex] << getCurrentLeftShift();
|
||||
return true;
|
||||
}
|
||||
|
||||
/** The current high long has been determined to not contain the set bit that is needed.
|
||||
* Decrement setBitForIndex to the previous high long and set curHighLong accordingly.
|
||||
*/
|
||||
private void toPreviousHighLong() {
|
||||
setBitForIndex -= (setBitForIndex & (Long.SIZE-1)) + 1;
|
||||
//assert getCurrentLeftShift() == 0;
|
||||
int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
|
||||
curHighLong = efEncoder.upperLongs[highIndex];
|
||||
}
|
||||
|
||||
/** setBitForIndex and efIndex have just been decremented, scan to the previous high set bit
|
||||
* by decrementing setBitForIndex and by setting curHighLong accordingly.
|
||||
* @return the previous encoded high value.
|
||||
*/
|
||||
private long previousHighValue() {
|
||||
while (curHighLong == 0L) {
|
||||
toPreviousHighLong(); // inlining and unrolling would simplify somewhat
|
||||
}
|
||||
setBitForIndex -= Long.numberOfLeadingZeros(curHighLong);
|
||||
return currentHighValue();
|
||||
}
|
||||
|
||||
/** If another value is available before the current decoding index, return this value
|
||||
* and decrease the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
|
||||
*/
|
||||
public long previousValue() {
|
||||
if (! toBeforeCurrentHighBit()) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
long highValue = previousHighValue();
|
||||
return combineHighLowValues(highValue, currentLowValue());
|
||||
}
|
||||
|
||||
|
||||
/** setBitForIndex and efIndex have just been decremented, scan backward to the high set bit
|
||||
* of at most a given high value
|
||||
* by decrementing setBitForIndex and by setting curHighLong accordingly.
|
||||
* <br>The current implementation does not use the index on the upper zero bit positions.
|
||||
* @return the largest encoded high value that is at most the given one.
|
||||
*/
|
||||
private long backToHighValue(long highTarget) {
|
||||
/* CHECKME: Add using the index as in advanceToHighValue */
|
||||
int curSetBits = Long.bitCount(curHighLong); // is shifted by getCurrentLeftShift()
|
||||
int curClearBits = Long.SIZE - curSetBits - getCurrentLeftShift();
|
||||
while ((currentHighValue() - curClearBits) > highTarget) {
|
||||
// curHighLong has not enough clear bits to reach highTarget
|
||||
efIndex -= curSetBits;
|
||||
if (efIndex < 0) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
toPreviousHighLong();
|
||||
//assert getCurrentLeftShift() == 0;
|
||||
curSetBits = Long.bitCount(curHighLong);
|
||||
curClearBits = Long.SIZE - curSetBits;
|
||||
}
|
||||
// curHighLong has enough clear bits to reach highTarget, but may not have enough set bits.
|
||||
long highValue = previousHighValue();
|
||||
while (highValue > highTarget) {
|
||||
/* CHECKME: See at advanceToHighValue on using broadword bit selection. */
|
||||
if (! toBeforeCurrentHighBit()) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
highValue = previousHighValue();
|
||||
}
|
||||
return highValue;
|
||||
}
|
||||
|
||||
/** Given a target value, go back to the first smaller or equal value
|
||||
* and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
|
||||
* <br>The current implementation does not use the index on the upper zero bit positions.
|
||||
*/
|
||||
public long backToValue(long target) {
|
||||
if (! toBeforeCurrentHighBit()) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
long highTarget = target >>> efEncoder.numLowBits;
|
||||
long highValue = backToHighValue(highTarget);
|
||||
if (highValue == NO_MORE_VALUES) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
// Linear search with low values:
|
||||
long currentValue = combineHighLowValues(highValue, currentLowValue());
|
||||
while (currentValue > target) {
|
||||
currentValue = previousValue();
|
||||
if (currentValue == NO_MORE_VALUES) {
|
||||
return NO_MORE_VALUES;
|
||||
}
|
||||
}
|
||||
return currentValue;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,130 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet; // for javadocs
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
||||
/** A DocIdSet in Elias-Fano encoding.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class EliasFanoDocIdSet extends DocIdSet {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoDocIdSet.class);
|
||||
|
||||
final EliasFanoEncoder efEncoder;
|
||||
|
||||
/**
|
||||
* Construct an EliasFanoDocIdSet. For efficient encoding, the parameters should be chosen as low as possible.
|
||||
* @param numValues At least the number of document ids that will be encoded.
|
||||
* @param upperBound At least the highest document id that will be encoded.
|
||||
*/
|
||||
public EliasFanoDocIdSet(int numValues, int upperBound) {
|
||||
efEncoder = new EliasFanoEncoder(numValues, upperBound);
|
||||
}
|
||||
|
||||
/** Provide an indication that is better to use an {@link EliasFanoDocIdSet} than a {@link FixedBitSet}
|
||||
* to encode document identifiers.
|
||||
* @param numValues The number of document identifiers that is to be encoded. Should be non negative.
|
||||
* @param upperBound The maximum possible value for a document identifier. Should be at least <code>numValues</code>.
|
||||
* @return See {@link EliasFanoEncoder#sufficientlySmallerThanBitSet(long, long)}
|
||||
*/
|
||||
public static boolean sufficientlySmallerThanBitSet(long numValues, long upperBound) {
|
||||
return EliasFanoEncoder.sufficientlySmallerThanBitSet(numValues, upperBound);
|
||||
}
|
||||
|
||||
/** Encode the document ids from a DocIdSetIterator.
|
||||
* @param disi This DocIdSetIterator should provide document ids that are consistent
|
||||
* with <code>numValues</code> and <code>upperBound</code> as provided to the constructor.
|
||||
*/
|
||||
public void encodeFromDisi(DocIdSetIterator disi) throws IOException {
|
||||
while (efEncoder.numEncoded < efEncoder.numValues) {
|
||||
int x = disi.nextDoc();
|
||||
if (x == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
throw new IllegalArgumentException("disi: " + disi.toString()
|
||||
+ "\nhas " + efEncoder.numEncoded
|
||||
+ " docs, but at least " + efEncoder.numValues + " are required.");
|
||||
}
|
||||
efEncoder.encodeNext(x);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a {@link DocIdSetIterator} to access encoded document ids.
|
||||
*/
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
if (efEncoder.lastEncoded >= DocIdSetIterator.NO_MORE_DOCS) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Highest encoded value too high for DocIdSetIterator.NO_MORE_DOCS: " + efEncoder.lastEncoded);
|
||||
}
|
||||
return new DocIdSetIterator() {
|
||||
private int curDocId = -1;
|
||||
private final EliasFanoDecoder efDecoder = efEncoder.getDecoder();
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return curDocId;
|
||||
}
|
||||
|
||||
private int setCurDocID(long value) {
|
||||
curDocId = (value == EliasFanoDecoder.NO_MORE_VALUES)
|
||||
? NO_MORE_DOCS
|
||||
: (int) value;
|
||||
return curDocId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
return setCurDocID(efDecoder.nextValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
return setCurDocID(efDecoder.advanceToValue(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return efDecoder.numEncoded();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return ((other instanceof EliasFanoDocIdSet))
|
||||
&& efEncoder.equals(((EliasFanoDocIdSet) other).efEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return efEncoder.hashCode() ^ getClass().hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + efEncoder.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,366 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.FixedBitSet; // for javadocs
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
||||
/** Encode a non decreasing sequence of non negative whole numbers in the Elias-Fano encoding
|
||||
* that was introduced in the 1970's by Peter Elias and Robert Fano.
|
||||
* <p>
|
||||
* The Elias-Fano encoding is a high bits / low bits representation of
|
||||
* a monotonically increasing sequence of {@code numValues > 0} natural numbers <code>x[i]</code>
|
||||
* <p>
|
||||
* {@code 0 <= x[0] <= x[1] <= ... <= x[numValues-2] <= x[numValues-1] <= upperBound}
|
||||
* <p>
|
||||
* where {@code upperBound > 0} is an upper bound on the last value.
|
||||
* <br>
|
||||
* The Elias-Fano encoding uses less than half a bit per encoded number more
|
||||
* than the smallest representation
|
||||
* that can encode any monotone sequence with the same bounds.
|
||||
* <p>
|
||||
* The lower <code>L</code> bits of each <code>x[i]</code> are stored explicitly and contiguously
|
||||
* in the lower-bits array, with <code>L</code> chosen as (<code>log()</code> base 2):
|
||||
* <p>
|
||||
* <code>L = max(0, floor(log(upperBound/numValues)))</code>
|
||||
* <p>
|
||||
* The upper bits are stored in the upper-bits array as a sequence of unary-coded gaps (<code>x[-1] = 0</code>):
|
||||
* <p>
|
||||
* <code>(x[i]/2**L) - (x[i-1]/2**L)</code>
|
||||
* <p>
|
||||
* The unary code encodes a natural number <code>n</code> by <code>n</code> 0 bits followed by a 1 bit:
|
||||
* <code>0...01</code>. <br>
|
||||
* In the upper bits the total the number of 1 bits is <code>numValues</code>
|
||||
* and the total number of 0 bits is:<p>
|
||||
* {@code floor(x[numValues-1]/2**L) <= upperBound/(2**max(0, floor(log(upperBound/numValues)))) <= 2*numValues}
|
||||
* <p>
|
||||
* The Elias-Fano encoding uses at most
|
||||
* <p>
|
||||
* <code>2 + ceil(log(upperBound/numValues))</code>
|
||||
* <p>
|
||||
* bits per encoded number. With <code>upperBound</code> in these bounds (<code>p</code> is an integer):
|
||||
* <p>
|
||||
* {@code 2**p < x[numValues-1] <= upperBound <= 2**(p+1)}
|
||||
* <p>
|
||||
* the number of bits per encoded number is minimized.
|
||||
* <p>
|
||||
* In this implementation the values in the sequence can be given as <code>long</code>,
|
||||
* <code>numValues = 0</code> and <code>upperBound = 0</code> are allowed,
|
||||
* and each of the upper and lower bit arrays should fit in a <code>long[]</code>.
|
||||
* <br>
|
||||
* An index of positions of zero's in the upper bits is also built.
|
||||
* <p>
|
||||
* This implementation is based on this article:
|
||||
* <br>
|
||||
* Sebastiano Vigna, "Quasi Succinct Indices", June 19, 2012, sections 3, 4 and 9.
|
||||
* Retrieved from http://arxiv.org/pdf/1206.4300 .
|
||||
*
|
||||
* <p>The articles originally describing the Elias-Fano representation are:
|
||||
* <br>Peter Elias, "Efficient storage and retrieval by content and address of static files",
|
||||
* J. Assoc. Comput. Mach., 21(2):246–260, 1974.
|
||||
* <br>Robert M. Fano, "On the number of bits required to implement an associative memory",
|
||||
* Memorandum 61, Computer Structures Group, Project MAC, MIT, Cambridge, Mass., 1971.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
|
||||
public class EliasFanoEncoder implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoEncoder.class);
|
||||
|
||||
final long numValues;
|
||||
private final long upperBound;
|
||||
final int numLowBits;
|
||||
final long lowerBitsMask;
|
||||
final long[] upperLongs;
|
||||
final long[] lowerLongs;
|
||||
private static final int LOG2_LONG_SIZE = Long.numberOfTrailingZeros(Long.SIZE);
|
||||
|
||||
long numEncoded = 0L;
|
||||
long lastEncoded = 0L;
|
||||
|
||||
/** The default index interval for zero upper bits. */
|
||||
public static final long DEFAULT_INDEX_INTERVAL = 256;
|
||||
final long numIndexEntries;
|
||||
final long indexInterval;
|
||||
final int nIndexEntryBits;
|
||||
/** upperZeroBitPositionIndex[i] (filled using packValue) will contain the bit position
|
||||
* just after the zero bit ((i+1) * indexInterval) in the upper bits.
|
||||
*/
|
||||
final long[] upperZeroBitPositionIndex;
|
||||
long currentEntryIndex; // also indicates how many entries in the index are valid.
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Construct an Elias-Fano encoder.
|
||||
* After construction, call {@link #encodeNext} <code>numValues</code> times to encode
|
||||
* a non decreasing sequence of non negative numbers.
|
||||
* @param numValues The number of values that is to be encoded.
|
||||
* @param upperBound At least the highest value that will be encoded.
|
||||
* For space efficiency this should not exceed the power of two that equals
|
||||
* or is the first higher than the actual maximum.
|
||||
* <br>When {@code numValues >= (upperBound/3)}
|
||||
* a {@link FixedBitSet} will take less space.
|
||||
* @param indexInterval The number of high zero bits for which a single index entry is built.
|
||||
* The index will have at most <code>2 * numValues / indexInterval</code> entries
|
||||
* and each index entry will use at most <code>ceil(log2(3 * numValues))</code> bits,
|
||||
* see {@link EliasFanoEncoder}.
|
||||
* @throws IllegalArgumentException when:
|
||||
* <ul>
|
||||
* <li><code>numValues</code> is negative, or
|
||||
* <li><code>numValues</code> is non negative and <code>upperBound</code> is negative, or
|
||||
* <li>the low bits do not fit in a <code>long[]</code>:
|
||||
* {@code (L * numValues / 64) > Integer.MAX_VALUE}, or
|
||||
* <li>the high bits do not fit in a <code>long[]</code>:
|
||||
* {@code (2 * numValues / 64) > Integer.MAX_VALUE}, or
|
||||
* <li>{@code indexInterval < 2},
|
||||
* <li>the index bits do not fit in a <code>long[]</code>:
|
||||
* {@code (numValues / indexInterval * ceil(2log(3 * numValues)) / 64) > Integer.MAX_VALUE}.
|
||||
* </ul>
|
||||
*/
|
||||
public EliasFanoEncoder(long numValues, long upperBound, long indexInterval) {
|
||||
if (numValues < 0L) {
|
||||
throw new IllegalArgumentException("numValues should not be negative: " + numValues);
|
||||
}
|
||||
this.numValues = numValues;
|
||||
if ((numValues > 0L) && (upperBound < 0L)) {
|
||||
throw new IllegalArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0");
|
||||
}
|
||||
this.upperBound = numValues > 0 ? upperBound : -1L; // if there is no value, -1 is the best upper bound
|
||||
int nLowBits = 0;
|
||||
if (this.numValues > 0) { // nLowBits = max(0; floor(2log(upperBound/numValues)))
|
||||
long lowBitsFac = this.upperBound / this.numValues;
|
||||
if (lowBitsFac > 0) {
|
||||
nLowBits = 63 - Long.numberOfLeadingZeros(lowBitsFac); // see Long.numberOfLeadingZeros javadocs
|
||||
}
|
||||
}
|
||||
this.numLowBits = nLowBits;
|
||||
this.lowerBitsMask = Long.MAX_VALUE >>> (Long.SIZE - 1 - this.numLowBits);
|
||||
|
||||
long numLongsForLowBits = numLongsForBits(numValues * numLowBits);
|
||||
if (numLongsForLowBits > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("numLongsForLowBits too large to index a long array: " + numLongsForLowBits);
|
||||
}
|
||||
this.lowerLongs = new long[(int) numLongsForLowBits];
|
||||
|
||||
long numHighBitsClear = ((this.upperBound > 0) ? this.upperBound : 0) >>> this.numLowBits;
|
||||
assert numHighBitsClear <= (2 * this.numValues);
|
||||
long numHighBitsSet = this.numValues;
|
||||
|
||||
long numLongsForHighBits = numLongsForBits(numHighBitsClear + numHighBitsSet);
|
||||
if (numLongsForHighBits > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("numLongsForHighBits too large to index a long array: " + numLongsForHighBits);
|
||||
}
|
||||
this.upperLongs = new long[(int) numLongsForHighBits];
|
||||
if (indexInterval < 2) {
|
||||
throw new IllegalArgumentException("indexInterval should at least 2: " + indexInterval);
|
||||
}
|
||||
// For the index:
|
||||
long maxHighValue = upperBound >>> this.numLowBits;
|
||||
long nIndexEntries = maxHighValue / indexInterval; // no zero value index entry
|
||||
this.numIndexEntries = (nIndexEntries >= 0) ? nIndexEntries : 0;
|
||||
long maxIndexEntry = maxHighValue + numValues - 1; // clear upper bits, set upper bits, start at zero
|
||||
this.nIndexEntryBits = (maxIndexEntry <= 0) ? 0
|
||||
: (64 - Long.numberOfLeadingZeros(maxIndexEntry));
|
||||
long numLongsForIndexBits = numLongsForBits(numIndexEntries * nIndexEntryBits);
|
||||
if (numLongsForIndexBits > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("numLongsForIndexBits too large to index a long array: " + numLongsForIndexBits);
|
||||
}
|
||||
this.upperZeroBitPositionIndex = new long[(int) numLongsForIndexBits];
|
||||
this.currentEntryIndex = 0;
|
||||
this.indexInterval = indexInterval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an Elias-Fano encoder using {@link #DEFAULT_INDEX_INTERVAL}.
|
||||
*/
|
||||
public EliasFanoEncoder(long numValues, long upperBound) {
|
||||
this(numValues, upperBound, DEFAULT_INDEX_INTERVAL);
|
||||
}
|
||||
|
||||
private static long numLongsForBits(long numBits) { // Note: int version in FixedBitSet.bits2words()
|
||||
assert numBits >= 0 : numBits;
|
||||
return (numBits + (Long.SIZE-1)) >>> LOG2_LONG_SIZE;
|
||||
}
|
||||
|
||||
/** Call at most <code>numValues</code> times to encode a non decreasing sequence of non negative numbers.
|
||||
* @param x The next number to be encoded.
|
||||
* @throws IllegalStateException when called more than <code>numValues</code> times.
|
||||
* @throws IllegalArgumentException when:
|
||||
* <ul>
|
||||
* <li><code>x</code> is smaller than an earlier encoded value, or
|
||||
* <li><code>x</code> is larger than <code>upperBound</code>.
|
||||
* </ul>
|
||||
*/
|
||||
public void encodeNext(long x) {
|
||||
if (numEncoded >= numValues) {
|
||||
throw new IllegalStateException("encodeNext called more than " + numValues + " times.");
|
||||
}
|
||||
if (lastEncoded > x) {
|
||||
throw new IllegalArgumentException(x + " smaller than previous " + lastEncoded);
|
||||
}
|
||||
if (x > upperBound) {
|
||||
throw new IllegalArgumentException(x + " larger than upperBound " + upperBound);
|
||||
}
|
||||
long highValue = x >>> numLowBits;
|
||||
encodeUpperBits(highValue);
|
||||
encodeLowerBits(x & lowerBitsMask);
|
||||
lastEncoded = x;
|
||||
// Add index entries:
|
||||
long indexValue = (currentEntryIndex + 1) * indexInterval;
|
||||
while (indexValue <= highValue) {
|
||||
long afterZeroBitPosition = indexValue + numEncoded;
|
||||
packValue(afterZeroBitPosition, upperZeroBitPositionIndex, nIndexEntryBits, currentEntryIndex);
|
||||
currentEntryIndex += 1;
|
||||
indexValue += indexInterval;
|
||||
}
|
||||
numEncoded++;
|
||||
}
|
||||
|
||||
private void encodeUpperBits(long highValue) {
|
||||
long nextHighBitNum = numEncoded + highValue; // sequence of unary gaps
|
||||
upperLongs[(int)(nextHighBitNum >>> LOG2_LONG_SIZE)] |= (1L << (nextHighBitNum & (Long.SIZE-1)));
|
||||
}
|
||||
|
||||
private void encodeLowerBits(long lowValue) {
|
||||
packValue(lowValue, lowerLongs, numLowBits, numEncoded);
|
||||
}
|
||||
|
||||
private static void packValue(long value, long[] longArray, int numBits, long packIndex) {
|
||||
if (numBits != 0) {
|
||||
long bitPos = numBits * packIndex;
|
||||
int index = (int) (bitPos >>> LOG2_LONG_SIZE);
|
||||
int bitPosAtIndex = (int) (bitPos & (Long.SIZE-1));
|
||||
longArray[index] |= (value << bitPosAtIndex);
|
||||
if ((bitPosAtIndex + numBits) > Long.SIZE) {
|
||||
longArray[index+1] = (value >>> (Long.SIZE - bitPosAtIndex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Provide an indication that it is better to use an {@link EliasFanoEncoder} than a {@link FixedBitSet}
|
||||
* to encode document identifiers.
|
||||
* This indication is not precise and may change in the future.
|
||||
* <br>An EliasFanoEncoder is favoured when the size of the encoding by the EliasFanoEncoder
|
||||
* (including some space for its index) is at most about 5/6 of the size of the FixedBitSet,
|
||||
* this is the same as comparing estimates of the number of bits accessed by a pair of FixedBitSets and
|
||||
* by a pair of non indexed EliasFanoDocIdSets when determining the intersections of the pairs.
|
||||
* <br>A bit set is preferred when {@code upperbound <= 256}.
|
||||
* <br>It is assumed that {@link #DEFAULT_INDEX_INTERVAL} is used.
|
||||
* @param numValues The number of document identifiers that is to be encoded. Should be non negative.
|
||||
* @param upperBound The maximum possible value for a document identifier. Should be at least <code>numValues</code>.
|
||||
*/
|
||||
public static boolean sufficientlySmallerThanBitSet(long numValues, long upperBound) {
|
||||
/* When (upperBound / 6) == numValues,
|
||||
* the number of bits per entry for the EliasFanoEncoder is 2 + ceil(2log(upperBound/numValues)) == 5.
|
||||
*
|
||||
* For intersecting two bit sets upperBound bits are accessed, roughly half of one, half of the other.
|
||||
* For intersecting two EliasFano sequences without index on the upper bits,
|
||||
* all (2 * 3 * numValues) upper bits are accessed.
|
||||
*/
|
||||
return (upperBound > (4 * Long.SIZE)) // prefer a bit set when it takes no more than 4 longs.
|
||||
&& (upperBound / 7) > numValues; // 6 + 1 to allow some room for the index.
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an {@link EliasFanoDecoder} to access the encoded values.
|
||||
* Perform all calls to {@link #encodeNext} before calling {@link #getDecoder}.
|
||||
*/
|
||||
public EliasFanoDecoder getDecoder() {
|
||||
// decode as far as currently encoded as determined by numEncoded.
|
||||
return new EliasFanoDecoder(this);
|
||||
}
|
||||
|
||||
/** Expert. The low bits. */
|
||||
public long[] getLowerBits() {
|
||||
return lowerLongs;
|
||||
}
|
||||
|
||||
/** Expert. The high bits. */
|
||||
public long[] getUpperBits() {
|
||||
return upperLongs;
|
||||
}
|
||||
|
||||
/** Expert. The index bits. */
|
||||
public long[] getIndexBits() {
|
||||
return upperZeroBitPositionIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder("EliasFanoSequence");
|
||||
s.append(" numValues " + numValues);
|
||||
s.append(" numEncoded " + numEncoded);
|
||||
s.append(" upperBound " + upperBound);
|
||||
s.append(" lastEncoded " + lastEncoded);
|
||||
s.append(" numLowBits " + numLowBits);
|
||||
s.append("\nupperLongs[" + upperLongs.length + "]");
|
||||
for (int i = 0; i < upperLongs.length; i++) {
|
||||
s.append(" " + ToStringUtils.longHex(upperLongs[i]));
|
||||
}
|
||||
s.append("\nlowerLongs[" + lowerLongs.length + "]");
|
||||
for (int i = 0; i < lowerLongs.length; i++) {
|
||||
s.append(" " + ToStringUtils.longHex(lowerLongs[i]));
|
||||
}
|
||||
s.append("\nindexInterval: " + indexInterval + ", nIndexEntryBits: " + nIndexEntryBits);
|
||||
s.append("\nupperZeroBitPositionIndex[" + upperZeroBitPositionIndex.length + "]");
|
||||
for (int i = 0; i < upperZeroBitPositionIndex.length; i++) {
|
||||
s.append(" " + ToStringUtils.longHex(upperZeroBitPositionIndex[i]));
|
||||
}
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (! (other instanceof EliasFanoEncoder)) {
|
||||
return false;
|
||||
}
|
||||
EliasFanoEncoder oefs = (EliasFanoEncoder) other;
|
||||
// no equality needed for upperBound
|
||||
return (this.numValues == oefs.numValues)
|
||||
&& (this.numEncoded == oefs.numEncoded)
|
||||
&& (this.numLowBits == oefs.numLowBits)
|
||||
&& (this.numIndexEntries == oefs.numIndexEntries)
|
||||
&& (this.indexInterval == oefs.indexInterval) // no need to check index content
|
||||
&& Arrays.equals(this.upperLongs, oefs.upperLongs)
|
||||
&& Arrays.equals(this.lowerLongs, oefs.lowerLongs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = ((int) (31*(numValues + 7*(numEncoded + 5*(numLowBits + 3*(numIndexEntries + 11*indexInterval))))))
|
||||
^ Arrays.hashCode(upperLongs)
|
||||
^ Arrays.hashCode(lowerLongs);
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED
|
||||
+ RamUsageEstimator.sizeOf(lowerLongs)
|
||||
+ RamUsageEstimator.sizeOf(upperLongs)
|
||||
+ RamUsageEstimator.sizeOf(upperZeroBitPositionIndex);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
public class TestBitUtil extends LuceneTestCase {
|
||||
|
||||
private void tstSelect(long x, int r, int exp) {
|
||||
if ((0 <= exp) && (exp <= 63)) {
|
||||
assertEquals("selectNaive(" + x + "," + r + ")", exp, BitUtil.selectNaive(x, r));
|
||||
assertEquals("select(" + x + "," + r + ")", exp, BitUtil.select(x, r));
|
||||
} else {
|
||||
int act = BitUtil.selectNaive(x, r);
|
||||
assertTrue("selectNaive(" + x + "," + r + ")", act < 0 || act > 63);
|
||||
act = BitUtil.select(x, r);
|
||||
assertTrue("select(" + x + "," + r + ")", act < 0 || act > 63);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSelectFromZero() {
|
||||
tstSelect(0L,1,72);
|
||||
}
|
||||
public void testSelectSingleBit() {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
tstSelect((1L << i),1,i);
|
||||
}
|
||||
}
|
||||
public void testSelectTwoBits() {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
for (int j = i+1; j < 64; j++) {
|
||||
long x = (1L << i) | (1L << j);
|
||||
//System.out.println(getName() + " i: " + i + " j: " + j);
|
||||
tstSelect(x,1,i);
|
||||
tstSelect(x,2,j);
|
||||
tstSelect(x,3,72);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void testSelectThreeBits() {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
for (int j = i+1; j < 64; j++) {
|
||||
for (int k = j+1; k < 64; k++) {
|
||||
long x = (1L << i) | (1L << j) | (1L << k);
|
||||
tstSelect(x,1,i);
|
||||
tstSelect(x,2,j);
|
||||
tstSelect(x,3,k);
|
||||
tstSelect(x,4,72);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public void testSelectAllBits() {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
tstSelect(0xFFFFFFFFFFFFFFFFL,i+1,i);
|
||||
}
|
||||
}
|
||||
public void testPerfSelectAllBits() {
|
||||
for (int j = 0; j < 100000; j++) { // 1000000 for real perf test
|
||||
for (int i = 0; i < 64; i++) {
|
||||
assertEquals(i, BitUtil.select(0xFFFFFFFFFFFFFFFFL, i+1));
|
||||
}
|
||||
}
|
||||
}
|
||||
public void testPerfSelectAllBitsNaive() {
|
||||
for (int j = 0; j < 10000; j++) { // real perftest: 1000000
|
||||
for (int i = 0; i < 64; i++) {
|
||||
assertEquals(i, BitUtil.selectNaive(0xFFFFFFFFFFFFFFFFL, i+1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BaseDocIdSetTestCase;
|
||||
|
||||
public class TestEliasFanoDocIdSet extends BaseDocIdSetTestCase<EliasFanoDocIdSet> {
|
||||
|
||||
@Override
|
||||
public EliasFanoDocIdSet copyOf(final BitSet bs, final int numBits) throws IOException {
|
||||
final EliasFanoDocIdSet set = new EliasFanoDocIdSet(bs.cardinality(), numBits - 1);
|
||||
set.encodeFromDisi(new DocIdSetIterator() {
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
doc = bs.nextSetBit(doc + 1);
|
||||
if (doc == -1) {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
assert doc < numBits;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return bs.cardinality();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return slowAdvance(target);
|
||||
}
|
||||
});
|
||||
return set;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,382 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestEliasFanoSequence extends LuceneTestCase {
|
||||
|
||||
private static EliasFanoEncoder makeEncoder(long[] values, long indexInterval) {
|
||||
long upperBound = -1L;
|
||||
for (long value: values) {
|
||||
assertTrue(value >= upperBound); // test data ok
|
||||
upperBound = value;
|
||||
}
|
||||
EliasFanoEncoder efEncoder = new EliasFanoEncoder(values.length, upperBound, indexInterval);
|
||||
for (long value: values) {
|
||||
efEncoder.encodeNext(value);
|
||||
}
|
||||
return efEncoder;
|
||||
}
|
||||
|
||||
private static void tstDecodeAllNext(long[] values, EliasFanoDecoder efd) {
|
||||
efd.toBeforeSequence();
|
||||
long nextValue = efd.nextValue();
|
||||
for (long expValue: values) {
|
||||
assertFalse("nextValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == nextValue);
|
||||
assertEquals(expValue, nextValue);
|
||||
nextValue = efd.nextValue();
|
||||
}
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, nextValue);
|
||||
}
|
||||
|
||||
private static void tstDecodeAllPrev(long[] values, EliasFanoDecoder efd) {
|
||||
efd.toAfterSequence();
|
||||
for (int i = values.length - 1; i >= 0; i--) {
|
||||
long previousValue = efd.previousValue();
|
||||
assertFalse("previousValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == previousValue);
|
||||
assertEquals(values[i], previousValue);
|
||||
}
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, efd.previousValue());
|
||||
}
|
||||
|
||||
private static void tstDecodeAllAdvanceToExpected(long[] values, EliasFanoDecoder efd) {
|
||||
efd.toBeforeSequence();
|
||||
long previousValue = -1L;
|
||||
long index = 0;
|
||||
for (long expValue: values) {
|
||||
if (expValue > previousValue) {
|
||||
long advanceValue = efd.advanceToValue(expValue);
|
||||
assertFalse("advanceValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == advanceValue);
|
||||
assertEquals(expValue, advanceValue);
|
||||
assertEquals(index, efd.currentIndex());
|
||||
previousValue = expValue;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
long advanceValue = efd.advanceToValue(previousValue+1);
|
||||
assertEquals("at end", EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
|
||||
}
|
||||
|
||||
private static void tstDecodeAdvanceToMultiples(long[] values, EliasFanoDecoder efd, final long m) {
|
||||
// test advancing to multiples of m
|
||||
assert m > 0;
|
||||
long previousValue = -1L;
|
||||
long index = 0;
|
||||
long mm = m;
|
||||
efd.toBeforeSequence();
|
||||
for (long expValue: values) {
|
||||
// mm > previousValue
|
||||
if (expValue >= mm) {
|
||||
long advanceValue = efd.advanceToValue(mm);
|
||||
assertFalse("advanceValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == advanceValue);
|
||||
assertEquals(expValue, advanceValue);
|
||||
assertEquals(index, efd.currentIndex());
|
||||
previousValue = expValue;
|
||||
do {
|
||||
mm += m;
|
||||
} while (mm <= previousValue);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
long advanceValue = efd.advanceToValue(mm);
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
|
||||
}
|
||||
|
||||
private static void tstDecodeBackToMultiples(long[] values, EliasFanoDecoder efd, final long m) {
|
||||
// test backing to multiples of m
|
||||
assert m > 0;
|
||||
efd.toAfterSequence();
|
||||
int index = values.length - 1;
|
||||
if (index < 0) {
|
||||
long advanceValue = efd.backToValue(0);
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
|
||||
return; // empty values, nothing to go back to/from
|
||||
}
|
||||
long expValue = values[index];
|
||||
long previousValue = expValue + 1;
|
||||
long mm = (expValue / m) * m;
|
||||
while (index >= 0) {
|
||||
expValue = values[index];
|
||||
assert mm < previousValue;
|
||||
if (expValue <= mm) {
|
||||
long backValue = efd.backToValue(mm);
|
||||
assertFalse("backToValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == backValue);
|
||||
assertEquals(expValue, backValue);
|
||||
assertEquals(index, efd.currentIndex());
|
||||
previousValue = expValue;
|
||||
do {
|
||||
mm -= m;
|
||||
} while (mm >= previousValue);
|
||||
}
|
||||
index--;
|
||||
}
|
||||
long backValue = efd.backToValue(mm);
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, backValue);
|
||||
}
|
||||
|
||||
private static void tstEqual(String mes, long[] exp, long[] act) {
|
||||
assertEquals(mes + ".length", exp.length, act.length);
|
||||
for (int i = 0; i < exp.length; i++) {
|
||||
if (exp[i] != act[i]) {
|
||||
fail(mes + "[" + i + "] " + exp[i] + " != " + act[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void tstDecodeAll(EliasFanoEncoder efEncoder, long[] values) {
|
||||
tstDecodeAllNext(values, efEncoder.getDecoder());
|
||||
tstDecodeAllPrev(values, efEncoder.getDecoder());
|
||||
tstDecodeAllAdvanceToExpected(values, efEncoder.getDecoder());
|
||||
}
|
||||
|
||||
private static void tstEFS(long[] values, long[] expHighLongs, long[] expLowLongs) {
|
||||
EliasFanoEncoder efEncoder = makeEncoder(values, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
tstEqual("upperBits", expHighLongs, efEncoder.getUpperBits());
|
||||
tstEqual("lowerBits", expLowLongs, efEncoder.getLowerBits());
|
||||
tstDecodeAll(efEncoder, values);
|
||||
}
|
||||
|
||||
private static void tstEFS2(long[] values) {
|
||||
EliasFanoEncoder efEncoder = makeEncoder(values, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
tstDecodeAll(efEncoder, values);
|
||||
}
|
||||
|
||||
private static void tstEFSadvanceToAndBackToMultiples(long[] values, long maxValue, long minAdvanceMultiple) {
|
||||
EliasFanoEncoder efEncoder = makeEncoder(values, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
for (long m = minAdvanceMultiple; m <= maxValue; m += 1) {
|
||||
tstDecodeAdvanceToMultiples(values, efEncoder.getDecoder(), m);
|
||||
tstDecodeBackToMultiples(values, efEncoder.getDecoder(), m);
|
||||
}
|
||||
}
|
||||
|
||||
private EliasFanoEncoder tstEFVI(long[] values, long indexInterval, long[] expIndexBits) {
|
||||
EliasFanoEncoder efEncVI = makeEncoder(values, indexInterval);
|
||||
tstEqual("upperZeroBitPositionIndex", expIndexBits, efEncVI.getIndexBits());
|
||||
return efEncVI;
|
||||
}
|
||||
|
||||
public void testEmpty() {
|
||||
long[] values = new long[0];
|
||||
long[] expHighBits = new long[0];
|
||||
long[] expLowBits = new long[0];
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testOneValue1() {
|
||||
long[] values = new long[] {0};
|
||||
long[] expHighBits = new long[] {0x1L};
|
||||
long[] expLowBits = new long[] {};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testTwoValues1() {
|
||||
long[] values = new long[] {0,0};
|
||||
long[] expHighBits = new long[] {0x3L};
|
||||
long[] expLowBits = new long[] {};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testOneValue2() {
|
||||
long[] values = new long[] {63};
|
||||
long[] expHighBits = new long[] {2};
|
||||
long[] expLowBits = new long[] {31};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testOneMaxValue() {
|
||||
long[] values = new long[] {Long.MAX_VALUE};
|
||||
long[] expHighBits = new long[] {2};
|
||||
long[] expLowBits = new long[] {Long.MAX_VALUE/2};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testTwoMinMaxValues() {
|
||||
long[] values = new long[] {0, Long.MAX_VALUE};
|
||||
long[] expHighBits = new long[] {0x11};
|
||||
long[] expLowBits = new long[] {0xE000000000000000L, 0x03FFFFFFFFFFFFFFL};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testTwoMaxValues() {
|
||||
long[] values = new long[] {Long.MAX_VALUE, Long.MAX_VALUE};
|
||||
long[] expHighBits = new long[] {0x18};
|
||||
long[] expLowBits = new long[] {-1L, 0x03FFFFFFFFFFFFFFL};
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testExample1() { // Figure 1 from Vigna 2012 paper
|
||||
long[] values = new long[] {5,8,8,15,32};
|
||||
long[] expLowBits = new long[] {Long.parseLong("0011000001", 2)}; // reverse block and bit order
|
||||
long[] expHighBits = new long[] {Long.parseLong("1000001011010", 2)}; // reverse block and bit order
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testHashCodeEquals() {
|
||||
long[] values = new long[] {5,8,8,15,32};
|
||||
EliasFanoEncoder efEncoder1 = makeEncoder(values, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
EliasFanoEncoder efEncoder2 = makeEncoder(values, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
assertEquals(efEncoder1, efEncoder2);
|
||||
assertEquals(efEncoder1.hashCode(), efEncoder2.hashCode());
|
||||
|
||||
EliasFanoEncoder efEncoder3 = makeEncoder(new long[] {1,2,3}, EliasFanoEncoder.DEFAULT_INDEX_INTERVAL);
|
||||
assertFalse(efEncoder1.equals(efEncoder3));
|
||||
assertFalse(efEncoder3.equals(efEncoder1));
|
||||
assertFalse(efEncoder1.hashCode() == efEncoder3.hashCode()); // implementation ok for these.
|
||||
}
|
||||
|
||||
public void testMonotoneSequences() {
|
||||
//for (int s = 2; s < 1222; s++) {
|
||||
for (int s = 2; s < 4422; s++) {
|
||||
long[] values = new long[s];
|
||||
for (int i = 0; i < s; i++) {
|
||||
values[i] = (i/2); // upperbound smaller than number of values, only upper bits encoded
|
||||
}
|
||||
tstEFS2(values);
|
||||
}
|
||||
}
|
||||
|
||||
public void testStrictMonotoneSequences() {
|
||||
// for (int s = 2; s < 1222; s++) {
|
||||
for (int s = 2; s < 4422; s++) {
|
||||
long[] values = new long[s];
|
||||
for (int i = 0; i < s; i++) {
|
||||
values[i] = i * ((long) i - 1) / 2; // Add a gap of (s-1) to previous
|
||||
// s = (s*(s+1) - (s-1)*s)/2
|
||||
}
|
||||
tstEFS2(values);
|
||||
}
|
||||
}
|
||||
|
||||
public void testHighBitLongZero() {
|
||||
final int s = 65;
|
||||
long[] values = new long[s];
|
||||
for (int i = 0; i < s-1; i++) {
|
||||
values[i] = 0;
|
||||
}
|
||||
values[s-1] = 128;
|
||||
long[] expHighBits = new long[] {-1,0,0,1};
|
||||
long[] expLowBits = new long[0];
|
||||
tstEFS(values, expHighBits, expLowBits);
|
||||
}
|
||||
|
||||
public void testAdvanceToAndBackToMultiples() {
|
||||
for (int s = 2; s < 130; s++) {
|
||||
long[] values = new long[s];
|
||||
for (int i = 0; i < s; i++) {
|
||||
values[i] = i * ((long) i + 1) / 2; // Add a gap of s to previous
|
||||
// s = (s*(s+1) - (s-1)*s)/2
|
||||
}
|
||||
tstEFSadvanceToAndBackToMultiples(values, values[s-1], 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmptyIndex() {
|
||||
long indexInterval = 2;
|
||||
long[] emptyLongs = new long[0];
|
||||
tstEFVI(emptyLongs, indexInterval, emptyLongs);
|
||||
}
|
||||
public void testMaxContentEmptyIndex() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,1};
|
||||
long[] emptyLongs = new long[0];
|
||||
tstEFVI(twoLongs, indexInterval, emptyLongs);
|
||||
}
|
||||
|
||||
public void testMinContentNonEmptyIndex() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,2};
|
||||
long[] indexLongs = new long[] {3}; // high bits 1001, index position after zero bit.
|
||||
tstEFVI(twoLongs, indexInterval, indexLongs);
|
||||
}
|
||||
|
||||
public void testIndexAdvanceToLast() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,2};
|
||||
long[] indexLongs = new long[] {3}; // high bits 1001
|
||||
EliasFanoEncoder efEncVI = tstEFVI(twoLongs, indexInterval, indexLongs);
|
||||
assertEquals(2, efEncVI.getDecoder().advanceToValue(2));
|
||||
}
|
||||
|
||||
public void testIndexAdvanceToAfterLast() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,2};
|
||||
long[] indexLongs = new long[] {3}; // high bits 1001
|
||||
EliasFanoEncoder efEncVI = tstEFVI(twoLongs, indexInterval, indexLongs);
|
||||
assertEquals(EliasFanoDecoder.NO_MORE_VALUES, efEncVI.getDecoder().advanceToValue(3));
|
||||
}
|
||||
|
||||
public void testIndexAdvanceToFirst() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,2};
|
||||
long[] indexLongs = new long[] {3}; // high bits 1001
|
||||
EliasFanoEncoder efEncVI = tstEFVI(twoLongs, indexInterval, indexLongs);
|
||||
assertEquals(0, efEncVI.getDecoder().advanceToValue(0));
|
||||
}
|
||||
|
||||
public void testTwoIndexEntries() {
|
||||
long indexInterval = 2;
|
||||
long[] twoLongs = new long[] {0,1,2,3,4,5};
|
||||
long[] indexLongs = new long[] {4 + 8*16}; // high bits 0b10101010101
|
||||
EliasFanoEncoder efEncVI = tstEFVI(twoLongs, indexInterval, indexLongs);
|
||||
EliasFanoDecoder efDecVI = efEncVI.getDecoder();
|
||||
assertEquals("advance 0", 0, efDecVI.advanceToValue(0));
|
||||
assertEquals("advance 5", 5, efDecVI.advanceToValue(5));
|
||||
assertEquals("advance 6", EliasFanoDecoder.NO_MORE_VALUES, efDecVI.advanceToValue(5));
|
||||
}
|
||||
|
||||
public void testExample2a() { // Figure 2 from Vigna 2012 paper
|
||||
long indexInterval = 4;
|
||||
long[] values = new long[] {5,8,8,15,32}; // two low bits, high values 1,2,2,3,8.
|
||||
long[] indexLongs = new long[] {8 + 12*16}; // high bits 0b 0001 0000 0101 1010
|
||||
EliasFanoEncoder efEncVI = tstEFVI(values, indexInterval, indexLongs);
|
||||
EliasFanoDecoder efDecVI = efEncVI.getDecoder();
|
||||
assertEquals("advance 22", 32, efDecVI.advanceToValue(22));
|
||||
}
|
||||
|
||||
public void testExample2b() { // Figure 2 from Vigna 2012 paper
|
||||
long indexInterval = 4;
|
||||
long[] values = new long[] {5,8,8,15,32}; // two low bits, high values 1,2,2,3,8.
|
||||
long[] indexLongs = new long[] {8 + 12*16}; // high bits 0b 0001 0000 0101 1010
|
||||
EliasFanoEncoder efEncVI = tstEFVI(values, indexInterval, indexLongs);
|
||||
EliasFanoDecoder efDecVI = efEncVI.getDecoder();
|
||||
assertEquals("initial next", 5, efDecVI.nextValue());
|
||||
assertEquals("advance 22", 32, efDecVI.advanceToValue(22));
|
||||
}
|
||||
|
||||
public void testExample2NoIndex1() { // Figure 2 from Vigna 2012 paper, no index, test broadword selection.
|
||||
long indexInterval = 16;
|
||||
long[] values = new long[] {5,8,8,15,32}; // two low bits, high values 1,2,2,3,8.
|
||||
long[] indexLongs = new long[0]; // high bits 0b 0001 0000 0101 1010
|
||||
EliasFanoEncoder efEncVI = tstEFVI(values, indexInterval, indexLongs);
|
||||
EliasFanoDecoder efDecVI = efEncVI.getDecoder();
|
||||
assertEquals("advance 22", 32, efDecVI.advanceToValue(22));
|
||||
}
|
||||
|
||||
public void testExample2NoIndex2() { // Figure 2 from Vigna 2012 paper, no index, test broadword selection.
|
||||
long indexInterval = 16;
|
||||
long[] values = new long[] {5,8,8,15,32}; // two low bits, high values 1,2,2,3,8.
|
||||
long[] indexLongs = new long[0]; // high bits 0b 0001 0000 0101 1010
|
||||
EliasFanoEncoder efEncVI = tstEFVI(values, indexInterval, indexLongs);
|
||||
EliasFanoDecoder efDecVI = efEncVI.getDecoder();
|
||||
assertEquals("initial next", 5, efDecVI.nextValue());
|
||||
assertEquals("advance 22", 32, efDecVI.advanceToValue(22));
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue