LUCENE-4161: Make packed int arrays usable by codecs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1357159 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-07-04 08:17:02 +00:00
parent 9250082566
commit e96b143b6a
23 changed files with 9566 additions and 745 deletions

View File

@ -315,10 +315,10 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
try {
// Subsample the index terms
clone1.seek(packedIndexStart);
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
clone2.seek(packedOffsetsStart);
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
// TODO: often we can get by w/ fewer bits per
// value, below.. .but this'd be more complex:

View File

@ -121,7 +121,7 @@ class VarStraightBytesImpl {
final IndexInput cloneIdx = reader.cloneIndex();
try {
numDataBytes = cloneIdx.readVLong();
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
for (int i = 0; i < maxDocs; i++) {
long offset = iter.next();
++lastDocID;

View File

@ -0,0 +1,167 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Represents long[], as a slice (offset + length) into an
* existing long[]. The {@link #longs} member should never be null; use
* {@link #EMPTY_LONGS} if necessary.
*
* @lucene.internal */
public final class LongsRef implements Comparable<LongsRef>, Cloneable {
public static final long[] EMPTY_LONGS = new long[0];
public long[] longs;
public int offset;
public int length;
public LongsRef() {
longs = EMPTY_LONGS;
}
public LongsRef(int capacity) {
longs = new long[capacity];
}
public LongsRef(long[] longs, int offset, int length) {
assert longs != null;
assert offset >= 0;
assert length >= 0;
assert longs.length >= offset + length;
this.longs = longs;
this.offset = offset;
this.length = length;
}
@Override
public LongsRef clone() {
return new LongsRef(longs, offset, length);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final long end = offset + length;
for(int i = offset; i < end; i++) {
result = prime * result + (int) (longs[i] ^ (longs[i]>>>32));
}
return result;
}
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
if (other instanceof LongsRef) {
return this.longsEquals((LongsRef) other);
}
return false;
}
public boolean longsEquals(LongsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final long[] otherInts = other.longs;
final long end = offset + length;
for(int upto=offset; upto<end; upto++,otherUpto++) {
if (longs[upto] != otherInts[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
/** Signed int order comparison */
public int compareTo(LongsRef other) {
if (this == other) return 0;
final long[] aInts = this.longs;
int aUpto = this.offset;
final long[] bInts = other.longs;
int bUpto = other.offset;
final long aStop = aUpto + Math.min(this.length, other.length);
while(aUpto < aStop) {
long aInt = aInts[aUpto++];
long bInt = bInts[bUpto++];
if (aInt > bInt) {
return 1;
} else if (aInt < bInt) {
return -1;
}
}
// One is a prefix of the other, or, they are equal:
return this.length - other.length;
}
public void copyLongs(LongsRef other) {
if (longs.length - offset < other.length) {
longs = new long[other.length];
offset = 0;
}
System.arraycopy(other.longs, other.offset, longs, offset, other.length);
length = other.length;
}
/**
* Used to grow the reference array.
*
* In general this should not be used as it does not take the offset into account.
* @lucene.internal */
public void grow(int newLength) {
assert offset == 0;
if (longs.length < newLength) {
longs = ArrayUtil.grow(longs, newLength);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append('[');
final long end = offset + length;
for(int i=offset;i<end;i++) {
if (i > offset) {
sb.append(' ');
}
sb.append(Long.toHexString(longs[i]));
}
sb.append(']');
return sb.toString();
}
/**
* Creates a new IntsRef that points to a copy of the longs from
* <code>other</code>
* <p>
* The returned IntsRef will have a length of other.length
* and an offset of zero.
*/
public static LongsRef deepCopyOf(LongsRef other) {
LongsRef clone = new LongsRef();
clone.copyLongs(other);
return clone;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
@ -24,62 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
* Direct wrapping of 16 bit values to a backing array of shorts.
* Direct wrapping of 16-bits values to a backing array.
* @lucene.internal
*/
final class Direct16 extends PackedInts.MutableImpl {
final short[] values;
class Direct16 extends PackedInts.MutableImpl {
private final short[] values;
private static final int BITS_PER_VALUE = 16;
public Direct16(int valueCount) {
super(valueCount, BITS_PER_VALUE);
Direct16(int valueCount) {
super(valueCount, 16);
values = new short[valueCount];
}
public Direct16(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE);
short[] values = new short[valueCount];
for(int i=0;i<valueCount;i++) {
Direct16(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readShort();
}
final int mod = valueCount % 4;
if (mod != 0) {
final int pad = 4-mod;
// round out long
for(int i=0;i<pad;i++) {
for (int i = mod; i < 4; ++i) {
in.readShort();
}
}
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct16(short[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (short)(value & 0xFFFF);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert (val & 0xffffL) == val;
Arrays.fill(values, fromIndex, toIndex, (short) val);
public long get(final int index) {
return values[index] & 0xFFFFL;
}
public void set(final int index, final long value) {
values[index] = (short) (value);
}
public long ramBytesUsed() {
@ -87,7 +64,7 @@ class Direct16 extends PackedInts.MutableImpl {
}
public void clear() {
Arrays.fill(values, (short)0);
Arrays.fill(values, (short) 0L);
}
@Override
@ -99,4 +76,35 @@ class Direct16 extends PackedInts.MutableImpl {
public boolean hasArray() {
return true;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (short) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFFFL);
Arrays.fill(values, fromIndex, toIndex, (short) val);
}
}

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
@ -24,58 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
* Direct wrapping of 32 bit values to a backing array of ints.
* Direct wrapping of 32-bits values to a backing array.
* @lucene.internal
*/
final class Direct32 extends PackedInts.MutableImpl {
final int[] values;
class Direct32 extends PackedInts.MutableImpl {
private final int[] values;
private static final int BITS_PER_VALUE = 32;
public Direct32(int valueCount) {
super(valueCount, BITS_PER_VALUE);
Direct32(int valueCount) {
super(valueCount, 32);
values = new int[valueCount];
}
public Direct32(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE);
int[] values = new int[valueCount];
for(int i=0;i<valueCount;i++) {
Direct32(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readInt();
}
final int mod = valueCount % 2;
if (mod != 0) {
in.readInt();
for (int i = mod; i < 2; ++i) {
in.readInt();
}
}
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct32(int[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFFFFFFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (int)(value & 0xFFFFFFFF);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert (val & 0xffffffffL) == val;
Arrays.fill(values, fromIndex, toIndex, (int) val);
public long get(final int index) {
return values[index] & 0xFFFFFFFFL;
}
public void set(final int index, final long value) {
values[index] = (int) (value);
}
public long ramBytesUsed() {
@ -83,11 +64,11 @@ class Direct32 extends PackedInts.MutableImpl {
}
public void clear() {
Arrays.fill(values, 0);
Arrays.fill(values, (int) 0L);
}
@Override
public int[] getArray() {
public Object getArray() {
return values;
}
@ -95,4 +76,35 @@ class Direct32 extends PackedInts.MutableImpl {
public boolean hasArray() {
return true;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFFFFFFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (int) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFFFFFFFL);
Arrays.fill(values, fromIndex, toIndex, (int) val);
}
}

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
@ -24,70 +26,31 @@ import java.io.IOException;
import java.util.Arrays;
/**
* Direct wrapping of 64 bit values to a backing array of longs.
* Direct wrapping of 64-bits values to a backing array.
* @lucene.internal
*/
class Direct64 extends PackedInts.MutableImpl {
private final long[] values;
private static final int BITS_PER_VALUE = 64;
final class Direct64 extends PackedInts.MutableImpl {
final long[] values;
public Direct64(int valueCount) {
super(valueCount, BITS_PER_VALUE);
Direct64(int valueCount) {
super(valueCount, 64);
values = new long[valueCount];
}
public Direct64(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE);
long[] values = new long[valueCount];
for(int i=0;i<valueCount;i++) {
Direct64(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readLong();
}
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct64(long[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
@Override
public long get(final int index) {
assert index >= 0 && index < size();
return values[index];
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public void set(final int index, final long value) {
values[index] = value;
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
values[index] = (value);
}
public long ramBytesUsed() {
@ -99,7 +62,7 @@ class Direct64 extends PackedInts.MutableImpl {
}
@Override
public long[] getArray() {
public Object getArray() {
return values;
}
@ -108,4 +71,29 @@ class Direct64 extends PackedInts.MutableImpl {
return true;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
}
}

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
@ -24,63 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
* Direct wrapping of 8 bit values to a backing array of bytes.
* Direct wrapping of 8-bits values to a backing array.
* @lucene.internal
*/
final class Direct8 extends PackedInts.MutableImpl {
final byte[] values;
class Direct8 extends PackedInts.MutableImpl {
private final byte[] values;
private static final int BITS_PER_VALUE = 8;
public Direct8(int valueCount) {
super(valueCount, BITS_PER_VALUE);
Direct8(int valueCount) {
super(valueCount, 8);
values = new byte[valueCount];
}
public Direct8(DataInput in, int valueCount)
throws IOException {
super(valueCount, BITS_PER_VALUE);
byte[] values = new byte[valueCount];
for(int i=0;i<valueCount;i++) {
Direct8(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readByte();
}
final int mod = valueCount % 8;
if (mod != 0) {
final int pad = 8-mod;
// round out long
for(int i=0;i<pad;i++) {
for (int i = mod; i < 8; ++i) {
in.readByte();
}
}
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct8(byte[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (byte)(value & 0xFF);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert (val & 0xffL) == val;
Arrays.fill(values, fromIndex, toIndex, (byte) val);
public long get(final int index) {
return values[index] & 0xFFL;
}
public void set(final int index, final long value) {
values[index] = (byte) (value);
}
public long ramBytesUsed() {
@ -88,7 +64,7 @@ class Direct8 extends PackedInts.MutableImpl {
}
public void clear() {
Arrays.fill(values, (byte)0);
Arrays.fill(values, (byte) 0L);
}
@Override
@ -100,4 +76,35 @@ class Direct8 extends PackedInts.MutableImpl {
public boolean hasArray() {
return true;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (byte) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFL);
Arrays.fill(values, fromIndex, toIndex, (byte) val);
}
}

View File

@ -1,11 +1,7 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
* limitations under the License.
*/
/** 48 bitsPerValue backed by short[] */
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Packs integers into 3 shorts (48 bits per value).
* @lucene.internal
*/
final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
final short[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final short[] blocks;
Packed16ThreeBlocks(int valueCount) {
super(valueCount, 48);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new short[3 * valueCount];
blocks = new short[valueCount * 3];
}
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readShort();
}
final int mod = blocks.length % 4;
if (mod != 0) {
final int pad = 4 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readShort();
for (int i = mod; i < 4; ++i) {
in.readShort();
}
}
}
@ -56,26 +58,55 @@ final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
return (blocks[o] & 0xFFFFL) << 32 | (blocks[o+1] & 0xFFFFL) << 16 | (blocks[o+2] & 0xFFFFL);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i] & 0xFFFFL) << 32 | (blocks[i+1] & 0xFFFFL) << 16 | (blocks[i+2] & 0xFFFFL);
}
return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = (short) (value >> 32);
blocks[o+1] = (short) (value >> 16);
blocks[o] = (short) (value >>> 32);
blocks[o+1] = (short) (value >>> 16);
blocks[o+2] = (short) value;
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = (short) (value >>> 32);
blocks[o++] = (short) (value >>> 16);
blocks[o++] = (short) value;
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
short block1 = (short) (val >> 32);
short block2 = (short) (val >> 16);
short block3 = (short) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
blocks[i++] = block1;
blocks[i++] = block2;
blocks[i++] = block3;
final short block1 = (short) (val >>> 32);
final short block2 = (short) (val >>> 16);
final short block3 = (short) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i] = block1;
blocks[i+1] = block2;
blocks[i+2] = block3;
}
}

View File

@ -138,6 +138,49 @@ class Packed64 extends PackedInts.MutableImpl {
& maskRight;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % op.values();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % op.values() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / op.values();
op.get(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * op.values();
index += gotValues;
len -= gotValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public void set(final int index, final long value) {
// The abstract index in a contiguous bit stream
@ -159,6 +202,48 @@ class Packed64 extends PackedInts.MutableImpl {
| (value << (BLOCK_SIZE - endBits));
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % op.values();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % op.values() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / op.values();
op.set(blocks, blockIndex, arr, off, iterations);
final int setValues = iterations * op.values();
index += setValues;
len -= setValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.set(index, arr, off, len);
}
}
@Override
public String toString() {

View File

@ -39,12 +39,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
@ -92,16 +86,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk get
assert index % valuesPerBlock == 0;
final long readMask = (1L << bitsPerValue) - 1;
final int startBlock = index / valuesPerBlock;
final int endBlock = (index + len) / valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.blocks() == 1;
assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.get(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
for (int i = 0; i < valuesPerBlock; ++i) {
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
}
}
if (index > originalIndex) {
// stay at the block boundary
@ -138,17 +130,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk set
assert index % valuesPerBlock == 0;
final int startBlock = index / valuesPerBlock;
final int endBlock = (index + len) / valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.blocks() == 1;
assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.set(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
long next = 0L;
for (int i = 0; i < valuesPerBlock; ++i) {
next |= (arr[off++] << (i * bitsPerValue));
}
blocks[block] = next;
}
if (index > originalIndex) {
// stay at the block boundary
@ -202,8 +191,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
protected PackedInts.Format getFormat() {
return PackedInts.Format.PACKED_SINGLE_BLOCK;
}
@Override

View File

@ -1,87 +0,0 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
private long pending;
private int shift;
private final long mask;
private int position;
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
super(valueCount, bitsPerValue, in);
pending = 0;
shift = 64;
mask = ~(~0L << bitsPerValue);
position = -1;
}
@Override
public long next() throws IOException {
if (shift + bitsPerValue > 64) {
pending = in.readLong();
shift = 0;
}
final long next = (pending >>> shift) & mask;
shift += bitsPerValue;
++position;
return next;
}
@Override
public int ord() {
return position;
}
@Override
public long advance(int ord) throws IOException {
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final int valuesPerBlock = 64 / bitsPerValue;
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
final long targetBlock = ord / valuesPerBlock;
final long blocksToSkip = targetBlock - nextBlock;
if (blocksToSkip > 0) {
final long skip = blocksToSkip << 3;
final long filePointer = in.getFilePointer();
in.seek(filePointer + skip);
shift = 64;
final int offsetInBlock = ord % valuesPerBlock;
for (int i = 0; i < offsetInBlock; ++i) {
next();
}
} else {
for (int i = position; i < ord - 1; ++i) {
next();
}
}
position = ord - 1;
return next();
}
}

View File

@ -1,81 +0,0 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts.Writer;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link Writer} for {@link Packed64SingleBlock} readers.
*/
final class Packed64SingleBlockWriter extends Writer {
private long pending;
private int shift;
private int written;
Packed64SingleBlockWriter(DataOutput out, int valueCount,
int bitsPerValue) throws IOException {
super(out, valueCount, bitsPerValue);
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
pending = 0;
shift = 0;
written = 0;
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
}
@Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
if (shift + bitsPerValue > Long.SIZE) {
out.writeLong(pending);
pending = 0;
shift = 0;
}
pending |= v << shift;
shift += bitsPerValue;
++written;
}
@Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (shift > 0) {
// add was called at least once
out.writeLong(pending);
}
}
@Override
public String toString() {
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
+ bitsPerValue + " bits/value)";
}
}

View File

@ -1,11 +1,7 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
* limitations under the License.
*/
/** 24 bitsPerValue backed by byte[] */
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Packs integers into 3 bytes (24 bits per value).
* @lucene.internal
*/
final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
final byte[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final byte[] blocks;
Packed8ThreeBlocks(int valueCount) {
super(valueCount, 24);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
this.blocks = new byte[3 * valueCount];
blocks = new byte[valueCount * 3];
}
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < blocks.length; i++) {
for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readByte();
}
final int mod = blocks.length % 8;
if (mod != 0) {
final int pad = 8 - mod;
// round out long
for (int i = 0; i < pad; i++) {
in.readByte();
for (int i = mod; i < 8; ++i) {
in.readByte();
}
}
}
@ -56,26 +58,55 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
return (blocks[o] & 0xFFL) << 16 | (blocks[o+1] & 0xFFL) << 8 | (blocks[o+2] & 0xFFL);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i] & 0xFFL) << 16 | (blocks[i+1] & 0xFFL) << 8 | (blocks[i+2] & 0xFFL);
}
return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = (byte) (value >>> 16);
blocks[o+1] = (byte) (value >>> 8);
blocks[o+2] = (byte) value;
blocks[o+1] = (byte) (value >> 8);
blocks[o] = (byte) (value >> 16);
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = (byte) (value >>> 16);
blocks[o++] = (byte) (value >>> 8);
blocks[o++] = (byte) value;
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
byte block1 = (byte) (val >> 16);
byte block2 = (byte) (val >> 8);
byte block3 = (byte) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
blocks[i++] = block1;
blocks[i++] = block2;
blocks[i++] = block3;
final byte block1 = (byte) (val >>> 16);
final byte block2 = (byte) (val >>> 8);
final byte block3 = (byte) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i] = block1;
blocks[i+1] = block2;
blocks[i+2] = block3;
}
}
@ -93,5 +124,4 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.LongsRef;
import java.io.IOException;
@ -34,7 +35,6 @@ import java.io.IOException;
*
* @lucene.internal
*/
public class PackedInts {
/**
@ -62,12 +62,184 @@ public class PackedInts {
*/
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
final static String CODEC_NAME = "PackedInts";
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
public final static String CODEC_NAME = "PackedInts";
public final static int VERSION_START = 0;
public final static int VERSION_CURRENT = VERSION_START;
static final int PACKED = 0;
static final int PACKED_SINGLE_BLOCK = 1;
/**
* A format to write packed ints.
*
* @lucene.internal
*/
public enum Format {
/**
* Compact format, all bits are written contiguously.
*/
PACKED(0) {
@Override
public int nblocks(int bitsPerValue, int values) {
return (int) Math.ceil((double) values * bitsPerValue / 64);
}
},
/**
* A format that may insert padding bits to improve encoding and decoding
* speed. Since this format doesn't support all possible bits per value, you
* should never use it directly, but rather use
* {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the
* format that best suits your needs.
*/
PACKED_SINGLE_BLOCK(1) {
@Override
public int nblocks(int bitsPerValue, int values) {
final int valuesPerBlock = 64 / bitsPerValue;
return (int) Math.ceil((double) values / valuesPerBlock);
}
@Override
public boolean isSupported(int bitsPerValue) {
return Packed64SingleBlock.isSupported(bitsPerValue);
}
@Override
public float overheadPerValue(int bitsPerValue) {
assert isSupported(bitsPerValue);
final int valuesPerBlock = 64 / bitsPerValue;
final int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
};
/**
* Get a format according to its ID.
*/
public static Format byId(int id) {
for (Format format : Format.values()) {
if (format.getId() == id) {
return format;
}
}
throw new IllegalArgumentException("Unknown format id: " + id);
}
private Format(int id) {
this.id = id;
}
public int id;
/**
* Returns the ID of the format.
*/
public int getId() {
return id;
}
/**
* Computes how many blocks are needed to store <code>values</code> values
* of size <code>bitsPerValue</code>.
*/
public abstract int nblocks(int bitsPerValue, int values);
/**
* Tests whether the provided number of bits per value is supported by the
* format.
*/
public boolean isSupported(int bitsPerValue) {
return bitsPerValue >= 1 && bitsPerValue <= 64;
}
/**
* Returns the overhead per value, in bits.
*/
public float overheadPerValue(int bitsPerValue) {
assert isSupported(bitsPerValue);
return 0f;
}
/**
* Returns the overhead ratio (<code>overhead per value / bits per value</code>).
*/
public final float overheadRatio(int bitsPerValue) {
assert isSupported(bitsPerValue);
return overheadPerValue(bitsPerValue) / bitsPerValue;
}
}
/**
* Simple class that holds a format and a number of bits per value.
*/
public static class FormatAndBits {
public final Format format;
public final int bitsPerValue;
public FormatAndBits(Format format, int bitsPerValue) {
this.format = format;
this.bitsPerValue = bitsPerValue;
}
}
/**
* Try to find the {@link Format} and number of bits per value that would
* restore from disk the fastest reader whose overhead is less than
* <code>acceptableOverheadRatio</code>.
* </p><p>
* The <code>acceptableOverheadRatio</code> parameter makes sense for
* random-access {@link Reader}s. In case you only plan to perform
* sequential access on this stream later on, you should probably use
* {@link PackedInts#COMPACT}.
* </p><p>
* If you don't know how many values you are going to write, use
* <code>valueCount = -1</code>.
*/
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
if (valueCount == -1) {
valueCount = Integer.MAX_VALUE;
}
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
int actualBitsPerValue = -1;
Format format = Format.PACKED;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
actualBitsPerValue = 8;
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
actualBitsPerValue = 16;
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
actualBitsPerValue = 32;
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
actualBitsPerValue = 64;
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
actualBitsPerValue = 24;
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
actualBitsPerValue = 48;
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) {
float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
actualBitsPerValue = bpv;
format = Format.PACKED_SINGLE_BLOCK;
break;
}
}
}
if (actualBitsPerValue < 0) {
actualBitsPerValue = bitsPerValue;
}
}
return new FormatAndBits(format, actualBitsPerValue);
}
/**
* A read-only random access array of positive integers.
@ -132,30 +304,39 @@ public class PackedInts {
public static interface ReaderIterator extends Closeable {
/** Returns next value */
long next() throws IOException;
/** Returns at least 1 and at most <code>count</code> next values,
* the returned ref MUST NOT be modified */
LongsRef next(int count) throws IOException;
/** Returns number of bits per value */
int getBitsPerValue();
/** Returns number of values */
int size();
/** Returns the current position */
int ord();
/** Skips to the given ordinal and returns its value.
* @return the value at the given position
* @throws IOException if reading the value throws an IOException*/
long advance(int ord) throws IOException;
}
static abstract class ReaderIteratorImpl implements ReaderIterator {
protected final IndexInput in;
protected final DataInput in;
protected final int bitsPerValue;
protected final int valueCount;
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) {
this.in = in;
this.bitsPerValue = bitsPerValue;
this.valueCount = valueCount;
}
@Override
public long next() throws IOException {
LongsRef nextValues = next(1);
assert nextValues.length > 0;
final long result = nextValues.longs[nextValues.offset];
++nextValues.offset;
--nextValues.length;
return result;
}
@Override
public int getBitsPerValue() {
return bitsPerValue;
@ -168,7 +349,9 @@ public class PackedInts {
@Override
public void close() throws IOException {
in.close();
if (in instanceof Closeable) {
((Closeable) in).close();
}
}
}
@ -217,7 +400,7 @@ public class PackedInts {
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
* @lucene.internal
*/
public static abstract class ReaderImpl implements Reader {
static abstract class ReaderImpl implements Reader {
protected final int bitsPerValue;
protected final int valueCount;
@ -257,7 +440,7 @@ public class PackedInts {
}
public static abstract class MutableImpl extends ReaderImpl implements Mutable {
static abstract class MutableImpl extends ReaderImpl implements Mutable {
protected MutableImpl(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
@ -283,13 +466,15 @@ public class PackedInts {
}
}
protected int getFormat() {
return PACKED;
protected Format getFormat() {
return Format.PACKED;
}
@Override
public void save(DataOutput out) throws IOException {
Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
Writer writer = getWriterNoHeader(out, getFormat(),
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
writer.writeHeader();
for (int i = 0; i < valueCount; ++i) {
writer.add(get(i));
}
@ -302,121 +487,209 @@ public class PackedInts {
*/
public static abstract class Writer {
protected final DataOutput out;
protected final int bitsPerValue;
protected final int valueCount;
protected final int bitsPerValue;
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
throws IOException {
assert bitsPerValue <= 64;
assert valueCount >= 0 || valueCount == -1;
this.out = out;
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
}
void writeHeader() throws IOException {
assert valueCount != -1;
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue);
out.writeVInt(valueCount);
out.writeVInt(getFormat());
out.writeVInt(getFormat().getId());
}
protected abstract int getFormat();
/** The format used to serialize values. */
protected abstract PackedInts.Format getFormat();
/** Add a value to the stream. */
public abstract void add(long v) throws IOException;
/** The number of bits per value. */
public final int bitsPerValue() {
return bitsPerValue;
}
/** Perform end-of-stream operations. */
public abstract void finish() throws IOException;
/**
* Returns the current ord in the stream (number of values that have been
* written so far minus one).
*/
public abstract int ord();
}
/**
* Retrieve PackedInt data from the DataInput and return a packed int
* structure based on it.
* Expert: Restore a {@link Reader} from a stream without reading metadata at
* the beginning of the stream. This method is useful to restore data from
* streams which have been created using
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in positioned at the beginning of a stored packed int structure.
* @return a read only random access capable array of positive integers.
* @throws IOException if the structure could not be retrieved.
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a Reader
* @throws IOException
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal
*/
public static Reader getReader(DataInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
public static Reader getReaderNoHeader(DataInput in, Format format, int version,
int valueCount, int bitsPerValue) throws IOException {
switch (format) {
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED:
switch (bitsPerValue) {
case 8:
return new Direct8(in, valueCount);
case 16:
return new Direct16(in, valueCount);
case 24:
return new Packed8ThreeBlocks(in, valueCount);
case 32:
return new Direct32(in, valueCount);
case 48:
return new Packed16ThreeBlocks(in, valueCount);
case 64:
return new Direct64(in, valueCount);
default:
return new Packed64(in, valueCount, bitsPerValue);
case 24:
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
return new Packed8ThreeBlocks(in, valueCount);
}
break;
case 48:
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
return new Packed16ThreeBlocks(in, valueCount);
}
break;
}
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
return new Packed64(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
/**
* Restore a {@link Reader} from a stream.
*
* @param in the stream to read data from
* @return a Reader
* @throws IOException
* @lucene.internal
*/
public static Reader getReader(DataInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Expert: Restore a {@link ReaderIterator} from a stream without reading
* metadata at the beginning of the stream. This method is useful to restore
* data from streams which have been created using
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return a ReaderIterator
* @throws IOException
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal
*/
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
int valueCount, int bitsPerValue, int mem) throws IOException {
return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
}
/**
* Retrieve PackedInts as a {@link ReaderIterator}
* @param in positioned at the beginning of a stored packed int structure.
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return an iterator to access the values
* @throws IOException if the structure could not be retrieved.
* @lucene.internal
*/
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) {
case PACKED:
return new PackedReaderIterator(valueCount, bitsPerValue, in);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
final Format format = Format.byId(in.readVInt());
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
}
/**
* Retrieve PackedInts.Reader that does not load values
* into RAM but rather accesses all values via the
* provided IndexInput.
* @param in positioned at the beginning of a stored packed int structure.
* @return an Reader to access the values
* @throws IOException if the structure could not be retrieved.
* Expert: Construct a direct {@link Reader} from a stream without reading
* metadata at the beginning of the stream. This method is useful to restore
* data from streams which have been created using
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
* </p><p>
* The returned reader will have very little memory overhead, but every call
* to {@link Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a direct Reader
* @throws IOException
* @lucene.internal
*/
public static Reader getDirectReader(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
int version, int valueCount, int bitsPerValue) throws IOException {
switch (format) {
case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
throw new AssertionError("Unknwown format: " + format);
}
}
/**
* Construct a direct {@link Reader} from an {@link IndexInput}. This method
* is useful to restore data from streams which have been created using
* {@link PackedInts#getWriter(DataOutput, int, int, float)}.
* </p><p>
* The returned reader will have very little memory overhead, but every call
* to {@link Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @return a direct Reader
* @throws IOException
* @lucene.internal
*/
public static Reader getDirectReader(IndexInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Create a packed integer array with the given amount of values initialized
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
* All Mutables known by this factory are kept fully in RAM.
*
* </p><p>
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
@ -433,103 +706,130 @@ public class PackedInts {
*/
public static Mutable getMutable(int valueCount,
int bitsPerValue, float acceptableOverheadRatio) {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
assert valueCount >= 0;
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new Direct8(valueCount);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new Direct16(valueCount);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new Direct32(valueCount);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new Direct64(valueCount);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return new Packed8ThreeBlocks(valueCount);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new Packed16ThreeBlocks(valueCount);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return Packed64SingleBlock.create(valueCount, bpv);
}
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
switch (formatAndBits.format) {
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
case PACKED:
switch (formatAndBits.bitsPerValue) {
case 8:
return new Direct8(valueCount);
case 16:
return new Direct16(valueCount);
case 32:
return new Direct32(valueCount);
case 64:
return new Direct64(valueCount);
case 24:
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
return new Packed8ThreeBlocks(valueCount);
}
break;
case 48:
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
return new Packed16ThreeBlocks(valueCount);
}
break;
}
}
return new Packed64(valueCount, bitsPerValue);
return new Packed64(valueCount, formatAndBits.bitsPerValue);
default:
throw new AssertionError();
}
}
/**
* Create a packed integer array writer for the given number of values at the
* given bits/value. Writers append to the given IndexOutput and has very
* low memory overhead.
* Expert: Create a packed integer array writer for the given output, format,
* value count, and number of bits per value.
* </p><p>
* The resulting stream will be long-aligned. This means that depending on
* the format which is used, up to 63 bits will be wasted. An easy way to
* make sure that no space is lost is to always use a <code>valueCount</code>
* that is a multiple of 64.
* </p><p>
* This method does not write any metadata to the stream, meaning that it is
* your responsibility to store it somewhere else in order to be able to
* recover data from the stream later on:
* <ul>
* <li><code>format</code> (using {@link Format#getId()}),</li>
* <li><code>valueCount</code>,</li>
* <li><code>bitsPerValue</code>,</li>
* <li>{@link #VERSION_CURRENT}.</li>
* </ul>
* </p><p>
* It is possible to start writing values without knowing how many of them you
* are actually going to write. To do this, just pass <code>-1</code> as
* <code>valueCount</code>. On the other hand, for any positive value of
* <code>valueCount</code>, the returned writer will make sure that you don't
* write more values than expected and pad the end of stream with zeros in
* case you have written less than <code>valueCount</code> when calling
* {@link Writer#finish()}.
* </p><p>
* The <code>mem</code> parameter lets you control how much memory can be used
* to buffer changes in memory before flushing to disk. High values of
* <code>mem</code> are likely to improve throughput. On the other hand, if
* speed is not that important to you, a value of <code>0</code> will use as
* little memory as possible and should already offer reasonable throughput.
*
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* @param out the data output
* @param format the format to use to serialize the values
* @param valueCount the number of values
* @param bitsPerValue the number of bits per value
* @param mem how much memory (in bytes) can be used to speed up serialization
* @return a Writer
* @throws IOException
* @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
* @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
* @lucene.internal
*/
public static Writer getWriterNoHeader(
DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) throws IOException {
return new PackedWriter(format, out, valueCount, bitsPerValue, mem);
}
/**
* Create a packed integer array writer for the given output, format, value
* count, and number of bits per value.
* </p><p>
* The resulting stream will be long-aligned. This means that depending on
* the format which is used under the hoods, up to 63 bits will be wasted.
* An easy way to make sure that no space is lost is to always use a
* <code>valueCount</code> that is a multiple of 64.
* </p><p>
* This method writes metadata to the stream, so that the resulting stream is
* sufficient to restore a {@link Reader} from it. You don't need to track
* <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case
* this is a problem, you should probably look at
* {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}.
* </p><p>
* The <code>acceptableOverheadRatio</code> parameter controls how
* readers that will be restored from this stream trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
* that the fastest implementation is selected. In case you are only interested
* in reading this stream sequentially later on, you should probably use
* {@link PackedInts#COMPACT}.
*
* @param out the destination for the produced bits.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @param out the data output
* @param valueCount the number of values
* @param bitsPerValue the number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a Writer ready for receiving values.
* @throws IOException if bits could not be written to out.
* @return a Writer
* @throws IOException
* @lucene.internal
*/
public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
assert valueCount >= 0;
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return getWriterByFormat(out, valueCount, 8, PACKED);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return getWriterByFormat(out, valueCount, 16, PACKED);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return getWriterByFormat(out, valueCount, 32, PACKED);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return getWriterByFormat(out, valueCount, 64, PACKED);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return getWriterByFormat(out, valueCount, 24, PACKED);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return getWriterByFormat(out, valueCount, 48, PACKED);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
}
}
}
return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
}
}
private static Writer getWriterByFormat(DataOutput out,
int valueCount, int bitsPerValue, int format) throws IOException {
switch (format) {
case PACKED:
return new PackedWriter(out, valueCount, bitsPerValue);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
default:
throw new IllegalArgumentException("Unknown format " + format);
}
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
writer.writeHeader();
return writer;
}
/** Returns how many bits are required to hold values up

View File

@ -17,76 +17,71 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
import org.apache.lucene.store.IndexInput;
import java.io.EOFException;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.LongsRef;
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
private long pending;
private int pendingBitsLeft;
private int position = -1;
// masks[n-1] masks for bottom n bits
private final long[] masks;
final PackedInts.Format format;
final BulkOperation bulkOperation;
final long[] nextBlocks;
final LongsRef nextValues;
final int iterations;
int position;
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
super(valueCount, bitsPerValue, in);
masks = new long[bitsPerValue];
long v = 1;
for (int i = 0; i < bitsPerValue; i++) {
v *= 2;
masks[i] = v - 1;
}
this.format = format;
bulkOperation = BulkOperation.of(format, bitsPerValue);
iterations = bulkOperation.computeIterations(valueCount, mem);
assert iterations > 0;
nextBlocks = new long[iterations * bulkOperation.blocks()];
nextValues = new LongsRef(new long[iterations * bulkOperation.values()], 0, 0);
assert iterations * bulkOperation.values() == nextValues.longs.length;
assert iterations * bulkOperation.blocks() == nextBlocks.length;
nextValues.offset = nextValues.longs.length;
position = -1;
}
public long next() throws IOException {
if (pendingBitsLeft == 0) {
pending = in.readLong();
pendingBitsLeft = 64;
}
@Override
public LongsRef next(int count) throws IOException {
assert nextValues.length >= 0;
assert count > 0;
assert nextValues.offset + nextValues.length <= nextValues.longs.length;
final long result;
if (pendingBitsLeft >= bitsPerValue) { // not split
result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
pendingBitsLeft -= bitsPerValue;
} else { // split
final int bits1 = bitsPerValue - pendingBitsLeft;
final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
pending = in.readLong();
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
result = result1 | result2;
nextValues.offset += nextValues.length;
final int remaining = valueCount - position - 1;
if (remaining <= 0) {
throw new EOFException();
}
++position;
return result;
count = Math.min(remaining, count);
if (nextValues.offset == nextValues.longs.length) {
final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
for (int i = 0; i < blocksToRead; ++i) {
nextBlocks[i] = in.readLong();
}
for (int i = blocksToRead; i < nextBlocks.length; ++i) {
nextBlocks[i] = 0L;
}
bulkOperation.get(nextBlocks, 0, nextValues.longs, 0, iterations);
nextValues.offset = 0;
}
nextValues.length = Math.min(nextValues.longs.length - nextValues.offset, count);
position += nextValues.length;
return nextValues;
}
@Override
public int ord() {
return position;
}
public long advance(final int ord) throws IOException{
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final long bits = (long) bitsPerValue;
final int posToSkip = ord - 1 - position;
final long bitsToSkip = (bits * (long)posToSkip);
if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
pendingBitsLeft -= bitsToSkip;
} else {
final long skip = bitsToSkip-pendingBitsLeft;
final long closestByte = (skip >> 6) << 3;
if (closestByte != 0) { // need to seek
final long filePointer = in.getFilePointer();
in.seek(filePointer + closestByte);
}
pending = in.readLong();
pendingBitsLeft = 64 - (int)(skip % 64);
}
position = ord-1;
return next();
}
}

View File

@ -19,101 +19,79 @@ package org.apache.lucene.util.packed;
import org.apache.lucene.store.DataOutput;
import java.io.EOFException;
import java.io.IOException;
// Packs high order byte first, to match
// IndexOutput.writeInt/Long/Short byte order
/**
* Generic writer for space-optimal packed values. The resulting bits can be
* used directly by Packed32, Packed64 and PackedDirect* and will always be
* long-aligned.
*/
final class PackedWriter extends PackedInts.Writer {
class PackedWriter extends PackedInts.Writer {
private long pending;
private int pendingBitPos;
boolean finished;
final PackedInts.Format format;
final BulkOperation bulkOperation;
final long[] nextBlocks;
final long[] nextValues;
final int iterations;
int off;
int written;
// masks[n-1] masks for bottom n bits
private final long[] masks;
private int written = 0;
public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
throws IOException {
PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem)
throws IOException {
super(out, valueCount, bitsPerValue);
pendingBitPos = 64;
masks = new long[bitsPerValue - 1];
long v = 1;
for (int i = 0; i < bitsPerValue - 1; i++) {
v *= 2;
masks[i] = v - 1;
}
this.format = format;
bulkOperation = BulkOperation.of(format, bitsPerValue);
iterations = bulkOperation.computeIterations(valueCount, mem);
nextBlocks = new long[iterations * bulkOperation.blocks()];
nextValues = new long[iterations * bulkOperation.values()];
off = 0;
written = 0;
finished = false;
}
@Override
protected int getFormat() {
return PackedInts.PACKED;
protected PackedInts.Format getFormat() {
return format;
}
/**
* Do not call this after finish
*/
@Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
//System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
// TODO
if (pendingBitPos >= bitsPerValue) {
// not split
// write-once, so we can |= w/o first masking to 0s
pending |= v << (pendingBitPos - bitsPerValue);
if (pendingBitPos == bitsPerValue) {
// flush
out.writeLong(pending);
pending = 0;
pendingBitPos = 64;
} else {
pendingBitPos -= bitsPerValue;
}
} else {
// split
// write top pendingBitPos bits of value into bottom bits of pending
pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
//System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
// flush
out.writeLong(pending);
// write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
pendingBitPos = 64 - bitsPerValue + pendingBitPos;
//System.out.println(" part2 v << " + pendingBitPos);
pending = (v << pendingBitPos);
assert v >= 0 && v <= PackedInts.maxValue(bitsPerValue);
assert !finished;
if (valueCount != -1 && written >= valueCount) {
throw new EOFException("Writing past end of stream");
}
written++;
nextValues[off++] = v;
if (off == nextValues.length) {
flush(nextValues.length);
off = 0;
}
++written;
}
@Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
assert !finished;
if (valueCount != -1) {
while (written < valueCount) {
add(0L);
}
}
flush(off);
finished = true;
}
if (pendingBitPos != 64) {
out.writeLong(pending);
private void flush(int nvalues) throws IOException {
bulkOperation.set(nextBlocks, 0, nextValues, 0, iterations);
final int blocks = format.nblocks(bitsPerValue, nvalues);
for (int i = 0; i < blocks; ++i) {
out.writeLong(nextBlocks[i]);
}
off = 0;
}
@Override
public String toString() {
return "PackedWriter(written " + written + "/" + valueCount + " with "
+ bitsPerValue + " bits/value)";
public int ord() {
return written - 1;
}
}

View File

@ -0,0 +1,264 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from fractions import gcd
"""Code generation for bulk operations"""
PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
OUTPUT_FILE = "BulkOperation.java"
HEADER = """// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.EnumMap;
/**
* Efficient sequential read/write of packed integers.
*/
abstract class BulkOperation {
static final EnumMap<PackedInts.Format, BulkOperation[]> BULK_OPERATIONS = new EnumMap<PackedInts.Format, BulkOperation[]>(PackedInts.Format.class);
public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {
assert bitsPerValue > 0 && bitsPerValue <= 64;
BulkOperation[] ops = BULK_OPERATIONS.get(format);
if (ops == null || ops[bitsPerValue] == null) {
throw new IllegalArgumentException("format: " + format + ", bitsPerValue: " + bitsPerValue);
}
return ops[bitsPerValue];
}
/**
* For every number of bits per value, there is a minimum number of
* blocks (b) / values (v) you need to write in order to reach the next block
* boundary:
* - 16 bits per value -> b=1, v=4
* - 24 bits per value -> b=3, v=8
* - 50 bits per value -> b=25, v=32
* - 63 bits per value -> b=63, v = 64
* - ...
*
* A bulk read consists in copying <code>iterations*v</code> values that are
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
* (higher values of <code>iterations</code> are likely to yield a better
* throughput) => this requires n * (b + v) longs in memory.
*
* This method computes <code>iterations</code> as
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
*/
public final int computeIterations(int valueCount, int ramBudget) {
final int iterations = (ramBudget >>> 3) / (blocks() + values());
if (iterations == 0) {
// at least 1
return 1;
} else if ((iterations - 1) * blocks() >= valueCount) {
// don't allocate for more than the size of the reader
return (int) Math.ceil((double) valueCount / values());
} else {
return iterations;
}
}
/**
* The minimum number of blocks required to perform a bulk get/set.
*/
public abstract int blocks();
/**
* The number of values that can be stored in <code>blocks()</code> blocks.
*/
public abstract int values();
/**
* Get <code>n * values()</code> values from <code>n * blocks()</code> blocks.
*/
public abstract void get(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
/**
* Set <code>n * values()</code> values into <code>n * blocks()</code> blocks.
*/
public abstract void set(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
"""
FOOTER = "}"
def packed64singleblock(bpv, f):
values = 64 / bpv
f.write("\n static final class Packed64SingleBlockBulkOperation%d extends BulkOperation {\n\n" %bpv)
f.write(" public int blocks() {\n")
f.write(" return 1;\n")
f.write(" }\n\n")
f.write(" public int values() {\n")
f.write(" return %d;\n" %values)
f.write(" }\n\n")
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
f.write(" final long block = blocks[bi++];\n")
mask = (1 << bpv) - 1
for i in xrange(values):
block_offset = i / values
offset_in_block = i % values
if i == 0:
f.write(" values[vi++] = block & %dL;\n" %mask)
elif i == values - 1:
f.write(" values[vi++] = block >>> %d;\n" %(i * bpv))
else:
f.write(" values[vi++] = (block >>> %d) & %dL;\n" %(i * bpv, mask))
f.write(" }\n")
f.write(" }\n\n")
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(values):
block_offset = i / values
offset_in_block = i % values
if i == 0:
f.write(" blocks[bi++] = values[vi++]")
else:
f.write(" | (values[vi++] << %d)" %(i * bpv))
if i == values - 1:
f.write(";\n")
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
def packed64(bpv, f):
blocks = bpv
values = blocks * 64 / bpv
while blocks % 2 == 0 and values % 2 == 0:
blocks /= 2
values /= 2
assert values * bpv == 64 * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
mask = (1 << bpv) - 1
f.write(" static final class Packed64BulkOperation%d extends BulkOperation {\n\n" %bpv)
f.write(" public int blocks() {\n")
f.write(" return %d;\n" %blocks)
f.write(" }\n\n")
f.write(" public int values() {\n")
f.write(" return %d;\n" %values)
f.write(" }\n\n")
if bpv == 64:
f.write(""" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {
System.arraycopy(blocks, bi, values, vi, iterations);
}
public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {
System.arraycopy(values, bi, blocks, vi, iterations);
}
}
""")
return
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(0, values):
block_offset = i * bpv / 64
bit_offset = (i * bpv) % 64
if bit_offset == 0:
# start of block
f.write(" final long block%d = blocks[bi++];\n" %block_offset);
f.write(" values[vi++] = block%d >>> %d;\n" %(block_offset, 64 - bpv))
elif bit_offset + bpv == 64:
# end of block
f.write(" values[vi++] = block%d & %dL;\n" %(block_offset, mask))
elif bit_offset + bpv < 64:
# middle of block
f.write(" values[vi++] = (block%d >>> %d) & %dL;\n" %(block_offset, 64 - bit_offset - bpv, mask))
else:
# value spans across 2 blocks
mask1 = (1 << (64 - bit_offset)) -1
shift1 = bit_offset + bpv - 64
shift2 = 64 - shift1
f.write(" final long block%d = blocks[bi++];\n" %(block_offset + 1));
f.write(" values[vi++] = ((block%d & %dL) << %d) | (block%d >>> %d);\n" %(block_offset, mask1, shift1, block_offset + 1, shift2))
f.write(" }\n")
f.write(" }\n\n")
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(0, values):
block_offset = i * bpv / 64
bit_offset = (i * bpv) % 64
if bit_offset == 0:
# start of block
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - bpv))
elif bit_offset + bpv == 64:
# end of block
f.write(" | values[vi++];\n")
elif bit_offset + bpv < 64:
# inside a block
f.write(" | (values[vi++] << %d)" %(64 - bit_offset - bpv))
else:
# value spans across 2 blocks
right_bits = bit_offset + bpv - 64
f.write(" | (values[vi] >>> %d);\n" %right_bits)
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - right_bits))
f.write(" }\n")
f.write(" }\n")
f.write(" }\n\n")
if __name__ == '__main__':
p64_bpv = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
f = open(OUTPUT_FILE, 'w')
f.write(HEADER)
f.write(" static {\n")
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED, new BulkOperation[65]);")
for bpv in xrange(1, 65):
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED)[%d] = new Packed64BulkOperation%d();\n" %(bpv, bpv))
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED_SINGLE_BLOCK, new BulkOperation[65]);\n")
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED_SINGLE_BLOCK)[%d] = new Packed64SingleBlockBulkOperation%d();\n" %(bpv, bpv))
f.write(" }\n")
for bpv in xrange(1, 65):
packed64(bpv, f)
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
packed64singleblock(bpv,f)
f.write(FOOTER)
f.close()

View File

@ -0,0 +1,175 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
HEADER="""// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
"""
TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
if __name__ == '__main__':
for bpv in TYPES.keys():
type
f = open("Direct%d.java" %bpv, 'w')
f.write(HEADER)
f.write("""/**
* Direct wrapping of %d-bits values to a backing array.
* @lucene.internal
*/\n""" %bpv)
f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
f.write(" final %s[] values;\n\n" %TYPES[bpv])
f.write(" Direct%d(int valueCount) {\n" %bpv)
f.write(" super(valueCount, %d);\n" %bpv)
f.write(" values = new %s[valueCount];\n" %TYPES[bpv])
f.write(" }\n\n")
f.write(" Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
f.write(" for (int i = 0; i < valueCount; ++i) {\n")
f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
if bpv != 64:
f.write(" final int mod = valueCount %% %d;\n" %(64 / bpv))
f.write(" if (mod != 0) {\n")
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
f.write(" in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
f.write("""
@Override
public long get(final int index) {
return values[index]%s;
}
public void set(final int index, final long value) {
values[index] = %s(value);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(values);
}
public void clear() {
Arrays.fill(values, %s0L);
}
@Override
public Object getArray() {
return values;
}
@Override
public boolean hasArray() {
return true;
}
""" %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
if bpv == 64:
f.write("""
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
}
""")
else:
f.write("""
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i]%s;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = %sarr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val%s);
Arrays.fill(values, fromIndex, toIndex, %sval);
}
""" %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
f.write("}\n")
f.close()

View File

@ -58,12 +58,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 %% bitsPerValue;
return (float) overhead / valuesPerBlock;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1);
@ -111,16 +105,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk get
assert index %% valuesPerBlock == 0;
final long readMask = (1L << bitsPerValue) - 1;
final int startBlock = index / valuesPerBlock;
final int endBlock = (index + len) / valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.blocks() == 1;
assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.get(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
for (int i = 0; i < valuesPerBlock; ++i) {
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
}
}
if (index > originalIndex) {
// stay at the block boundary
@ -157,17 +149,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk set
assert index %% valuesPerBlock == 0;
final int startBlock = index / valuesPerBlock;
final int endBlock = (index + len) / valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.blocks() == 1;
assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.set(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
long next = 0L;
for (int i = 0; i < valuesPerBlock; ++i) {
next |= (arr[off++] << (i * bitsPerValue));
}
blocks[block] = next;
}
if (index > originalIndex) {
// stay at the block boundary
@ -221,8 +210,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
protected PackedInts.Format getFormat() {
return PackedInts.Format.PACKED_SINGLE_BLOCK;
}
@Override

View File

@ -0,0 +1,161 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
HEADER="""// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
"""
TYPES = {8: "byte", 16: "short"}
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
if __name__ == '__main__':
for bpv in TYPES.keys():
type
f = open("Packed%dThreeBlocks.java" %bpv, 'w')
f.write(HEADER)
f.write("""/**
* Packs integers into 3 %ss (%d bits per value).
* @lucene.internal
*/\n""" %(TYPES[bpv], bpv*3))
f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
f.write(" final %s[] blocks;\n\n" %TYPES[bpv])
f.write(" public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
f.write(" Packed%dThreeBlocks(int valueCount) {\n" %bpv)
f.write(" super(valueCount, %d);\n" %(bpv*3))
f.write(" if (valueCount > MAX_SIZE) {\n")
f.write(" throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
f.write(" }\n")
f.write(" blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
f.write(" }\n\n")
f.write(" Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" final int mod = blocks.length %% %d;\n" %(64 / bpv))
f.write(" if (mod != 0) {\n")
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
f.write(" in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
f.write("""
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
}
return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = %s(value >>> %d);
blocks[o+1] = %s(value >>> %d);
blocks[o+2] = %svalue;
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = %s(value >>> %d);
blocks[o++] = %s(value >>> %d);
blocks[o++] = %svalue;
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
final %s block1 = %s(val >>> %d);
final %s block2 = %s(val >>> %d);
final %s block3 = %sval;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i] = block1;
blocks[i+1] = block2;
blocks[i+2] = block3;
}
}
@Override
public void clear() {
Arrays.fill(blocks, %s0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}
""" %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
f.close()

View File

@ -19,9 +19,16 @@
<head></head>
<body bgcolor="white">
<p>Packed integer arrays and streams.</p>
<p>
The packed package provides random access capable arrays of positive longs.
The implementations provides different trade offs between memory usage and
The packed package provides
<ul>
<li>sequential and random access capable arrays of positive longs,</li>
<li>routines for efficient serialization and deserialization of streams of packed integers.</li>
</ul>
The implementations provide different trade-offs between memory usage and
access speed. The standard usage scenario is replacing large int or long
arrays in order to reduce the memory footprint.
</p><p>

View File

@ -24,6 +24,7 @@ import java.util.Random;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.*;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.Slow;
@ -57,6 +58,9 @@ public class TestPackedInts extends LuceneTestCase {
for(int nbits=1;nbits<=64;nbits++) {
final long maxValue = PackedInts.maxValue(nbits);
final int valueCount = 100+random().nextInt(500);
final int bufferSize = random().nextBoolean()
? _TestUtil.nextInt(random(), 0, 48)
: _TestUtil.nextInt(random(), 0, 4096);
final Directory d = newDirectory();
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
@ -79,10 +83,10 @@ public class TestPackedInts extends LuceneTestCase {
// ensure that finish() added the (valueCount-actualValueCount) missing values
final long bytes;
switch (w.getFormat()) {
case PackedInts.PACKED:
case PACKED:
bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
break;
case PackedInts.PACKED_SINGLE_BLOCK:
case PACKED_SINGLE_BLOCK:
final int valuesPerBlock = 64 / w.bitsPerValue;
bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
break;
@ -97,7 +101,7 @@ public class TestPackedInts extends LuceneTestCase {
CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header
assertEquals(w.bitsPerValue, in.readVInt());
assertEquals(valueCount, in.readVInt());
assertEquals(w.getFormat(), in.readVInt());
assertEquals(w.getFormat().getId(), in.readVInt());
assertEquals(startFp, in.getFilePointer());
in.close();
}
@ -113,37 +117,34 @@ public class TestPackedInts extends LuceneTestCase {
}
in.close();
}
{ // test reader iterator next
IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in);
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
for(int i=0;i<valueCount;i++) {
assertEquals("index=" + i + " valueCount="
+ valueCount + " nbits=" + nbits + " for "
+ r.getClass().getSimpleName(), values[i], r.next());
assertEquals(i, r.ord());
}
assertEquals(fp, in.getFilePointer());
in.close();
}
{ // test reader iterator next vs. advance
{ // test reader iterator bulk next
IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.ReaderIterator intsEnum = PackedInts.getReaderIterator(in);
for (int i = 0; i < valueCount; i +=
1 + ((valueCount - i) <= 20 ? random().nextInt(valueCount - i)
: random().nextInt(20))) {
final String msg = "index=" + i + " valueCount="
+ valueCount + " nbits=" + nbits + " for "
+ intsEnum.getClass().getSimpleName();
if (i - intsEnum.ord() == 1 && random().nextBoolean()) {
assertEquals(msg, values[i], intsEnum.next());
} else {
assertEquals(msg, values[i], intsEnum.advance(i));
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
int i = 0;
while (i < valueCount) {
final int count = _TestUtil.nextInt(random(), 1, 95);
final LongsRef next = r.next(count);
for (int k = 0; k < next.length; ++k) {
assertEquals("index=" + i + " valueCount="
+ valueCount + " nbits=" + nbits + " for "
+ r.getClass().getSimpleName(), values[i + k], next.longs[next.offset + k]);
}
assertEquals(msg, i, intsEnum.ord());
i += next.length;
}
if (intsEnum.ord() < valueCount - 1)
assertEquals(values[valueCount - 1], intsEnum
.advance(valueCount - 1));
assertEquals(valueCount - 1, intsEnum.ord());
assertEquals(fp, in.getFilePointer());
in.close();
}