LUCENE-4161: Make packed int arrays usable by codecs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1357159 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-07-04 08:17:02 +00:00
parent 9250082566
commit e96b143b6a
23 changed files with 9566 additions and 745 deletions

View File

@ -315,10 +315,10 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
try { try {
// Subsample the index terms // Subsample the index terms
clone1.seek(packedIndexStart); clone1.seek(packedIndexStart);
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1); final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
clone2.seek(packedOffsetsStart); clone2.seek(packedOffsetsStart);
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2); final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
// TODO: often we can get by w/ fewer bits per // TODO: often we can get by w/ fewer bits per
// value, below.. .but this'd be more complex: // value, below.. .but this'd be more complex:

View File

@ -121,7 +121,7 @@ class VarStraightBytesImpl {
final IndexInput cloneIdx = reader.cloneIndex(); final IndexInput cloneIdx = reader.cloneIndex();
try { try {
numDataBytes = cloneIdx.readVLong(); numDataBytes = cloneIdx.readVLong();
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx); final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
for (int i = 0; i < maxDocs; i++) { for (int i = 0; i < maxDocs; i++) {
long offset = iter.next(); long offset = iter.next();
++lastDocID; ++lastDocID;

View File

@ -0,0 +1,167 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Represents long[], as a slice (offset + length) into an
* existing long[]. The {@link #longs} member should never be null; use
* {@link #EMPTY_LONGS} if necessary.
*
* @lucene.internal */
public final class LongsRef implements Comparable<LongsRef>, Cloneable {
public static final long[] EMPTY_LONGS = new long[0];
public long[] longs;
public int offset;
public int length;
public LongsRef() {
longs = EMPTY_LONGS;
}
public LongsRef(int capacity) {
longs = new long[capacity];
}
public LongsRef(long[] longs, int offset, int length) {
assert longs != null;
assert offset >= 0;
assert length >= 0;
assert longs.length >= offset + length;
this.longs = longs;
this.offset = offset;
this.length = length;
}
@Override
public LongsRef clone() {
return new LongsRef(longs, offset, length);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final long end = offset + length;
for(int i = offset; i < end; i++) {
result = prime * result + (int) (longs[i] ^ (longs[i]>>>32));
}
return result;
}
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
if (other instanceof LongsRef) {
return this.longsEquals((LongsRef) other);
}
return false;
}
public boolean longsEquals(LongsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final long[] otherInts = other.longs;
final long end = offset + length;
for(int upto=offset; upto<end; upto++,otherUpto++) {
if (longs[upto] != otherInts[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
/** Signed int order comparison */
public int compareTo(LongsRef other) {
if (this == other) return 0;
final long[] aInts = this.longs;
int aUpto = this.offset;
final long[] bInts = other.longs;
int bUpto = other.offset;
final long aStop = aUpto + Math.min(this.length, other.length);
while(aUpto < aStop) {
long aInt = aInts[aUpto++];
long bInt = bInts[bUpto++];
if (aInt > bInt) {
return 1;
} else if (aInt < bInt) {
return -1;
}
}
// One is a prefix of the other, or, they are equal:
return this.length - other.length;
}
public void copyLongs(LongsRef other) {
if (longs.length - offset < other.length) {
longs = new long[other.length];
offset = 0;
}
System.arraycopy(other.longs, other.offset, longs, offset, other.length);
length = other.length;
}
/**
* Used to grow the reference array.
*
* In general this should not be used as it does not take the offset into account.
* @lucene.internal */
public void grow(int newLength) {
assert offset == 0;
if (longs.length < newLength) {
longs = ArrayUtil.grow(longs, newLength);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append('[');
final long end = offset + length;
for(int i=offset;i<end;i++) {
if (i > offset) {
sb.append(' ');
}
sb.append(Long.toHexString(longs[i]));
}
sb.append(']');
return sb.toString();
}
/**
* Creates a new IntsRef that points to a copy of the longs from
* <code>other</code>
* <p>
* The returned IntsRef will have a length of other.length
* and an offset of zero.
*/
public static LongsRef deepCopyOf(LongsRef other) {
LongsRef clone = new LongsRef();
clone.copyLongs(other);
return clone;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
/* /*
@ -24,62 +26,37 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
/** /**
* Direct wrapping of 16 bit values to a backing array of shorts. * Direct wrapping of 16-bits values to a backing array.
* @lucene.internal * @lucene.internal
*/ */
final class Direct16 extends PackedInts.MutableImpl {
final short[] values;
class Direct16 extends PackedInts.MutableImpl { Direct16(int valueCount) {
private final short[] values; super(valueCount, 16);
private static final int BITS_PER_VALUE = 16;
public Direct16(int valueCount) {
super(valueCount, BITS_PER_VALUE);
values = new short[valueCount]; values = new short[valueCount];
} }
public Direct16(DataInput in, int valueCount) throws IOException { Direct16(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE); this(valueCount);
short[] values = new short[valueCount]; for (int i = 0; i < valueCount; ++i) {
for(int i=0;i<valueCount;i++) {
values[i] = in.readShort(); values[i] = in.readShort();
} }
final int mod = valueCount % 4; final int mod = valueCount % 4;
if (mod != 0) { if (mod != 0) {
final int pad = 4-mod; for (int i = mod; i < 4; ++i) {
// round out long
for(int i=0;i<pad;i++) {
in.readShort(); in.readShort();
} }
} }
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct16(short[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (short)(value & 0xFFFF);
} }
@Override @Override
public void fill(int fromIndex, int toIndex, long val) { public long get(final int index) {
assert (val & 0xffffL) == val; return values[index] & 0xFFFFL;
Arrays.fill(values, fromIndex, toIndex, (short) val); }
public void set(final int index, final long value) {
values[index] = (short) (value);
} }
public long ramBytesUsed() { public long ramBytesUsed() {
@ -87,7 +64,7 @@ class Direct16 extends PackedInts.MutableImpl {
} }
public void clear() { public void clear() {
Arrays.fill(values, (short)0); Arrays.fill(values, (short) 0L);
} }
@Override @Override
@ -99,4 +76,35 @@ class Direct16 extends PackedInts.MutableImpl {
public boolean hasArray() { public boolean hasArray() {
return true; return true;
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (short) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFFFL);
Arrays.fill(values, fromIndex, toIndex, (short) val);
}
} }

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
/* /*
@ -24,58 +26,37 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
/** /**
* Direct wrapping of 32 bit values to a backing array of ints. * Direct wrapping of 32-bits values to a backing array.
* @lucene.internal * @lucene.internal
*/ */
final class Direct32 extends PackedInts.MutableImpl {
final int[] values;
class Direct32 extends PackedInts.MutableImpl { Direct32(int valueCount) {
private final int[] values; super(valueCount, 32);
private static final int BITS_PER_VALUE = 32;
public Direct32(int valueCount) {
super(valueCount, BITS_PER_VALUE);
values = new int[valueCount]; values = new int[valueCount];
} }
public Direct32(DataInput in, int valueCount) throws IOException { Direct32(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE); this(valueCount);
int[] values = new int[valueCount]; for (int i = 0; i < valueCount; ++i) {
for(int i=0;i<valueCount;i++) {
values[i] = in.readInt(); values[i] = in.readInt();
} }
final int mod = valueCount % 2; final int mod = valueCount % 2;
if (mod != 0) { if (mod != 0) {
in.readInt(); for (int i = mod; i < 2; ++i) {
in.readInt();
}
} }
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct32(int[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFFFFFFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (int)(value & 0xFFFFFFFF);
} }
@Override @Override
public void fill(int fromIndex, int toIndex, long val) { public long get(final int index) {
assert (val & 0xffffffffL) == val; return values[index] & 0xFFFFFFFFL;
Arrays.fill(values, fromIndex, toIndex, (int) val); }
public void set(final int index, final long value) {
values[index] = (int) (value);
} }
public long ramBytesUsed() { public long ramBytesUsed() {
@ -83,11 +64,11 @@ class Direct32 extends PackedInts.MutableImpl {
} }
public void clear() { public void clear() {
Arrays.fill(values, 0); Arrays.fill(values, (int) 0L);
} }
@Override @Override
public int[] getArray() { public Object getArray() {
return values; return values;
} }
@ -95,4 +76,35 @@ class Direct32 extends PackedInts.MutableImpl {
public boolean hasArray() { public boolean hasArray() {
return true; return true;
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFFFFFFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (int) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFFFFFFFL);
Arrays.fill(values, fromIndex, toIndex, (int) val);
}
} }

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
/* /*
@ -24,70 +26,31 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
/** /**
* Direct wrapping of 64 bit values to a backing array of longs. * Direct wrapping of 64-bits values to a backing array.
* @lucene.internal * @lucene.internal
*/ */
class Direct64 extends PackedInts.MutableImpl { final class Direct64 extends PackedInts.MutableImpl {
private final long[] values; final long[] values;
private static final int BITS_PER_VALUE = 64;
public Direct64(int valueCount) { Direct64(int valueCount) {
super(valueCount, BITS_PER_VALUE); super(valueCount, 64);
values = new long[valueCount]; values = new long[valueCount];
} }
public Direct64(DataInput in, int valueCount) throws IOException { Direct64(DataInput in, int valueCount) throws IOException {
super(valueCount, BITS_PER_VALUE); this(valueCount);
long[] values = new long[valueCount]; for (int i = 0; i < valueCount; ++i) {
for(int i=0;i<valueCount;i++) {
values[i] = in.readLong(); values[i] = in.readLong();
} }
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct64(long[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
} }
@Override
public long get(final int index) { public long get(final int index) {
assert index >= 0 && index < size();
return values[index]; return values[index];
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public void set(final int index, final long value) { public void set(final int index, final long value) {
values[index] = value; values[index] = (value);
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
} }
public long ramBytesUsed() { public long ramBytesUsed() {
@ -99,7 +62,7 @@ class Direct64 extends PackedInts.MutableImpl {
} }
@Override @Override
public long[] getArray() { public Object getArray() {
return values; return values;
} }
@ -108,4 +71,29 @@ class Direct64 extends PackedInts.MutableImpl {
return true; return true;
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
}
} }

View File

@ -1,3 +1,5 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
/* /*
@ -24,63 +26,37 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
/** /**
* Direct wrapping of 8 bit values to a backing array of bytes. * Direct wrapping of 8-bits values to a backing array.
* @lucene.internal * @lucene.internal
*/ */
final class Direct8 extends PackedInts.MutableImpl {
final byte[] values;
class Direct8 extends PackedInts.MutableImpl { Direct8(int valueCount) {
private final byte[] values; super(valueCount, 8);
private static final int BITS_PER_VALUE = 8;
public Direct8(int valueCount) {
super(valueCount, BITS_PER_VALUE);
values = new byte[valueCount]; values = new byte[valueCount];
} }
public Direct8(DataInput in, int valueCount) Direct8(DataInput in, int valueCount) throws IOException {
throws IOException { this(valueCount);
super(valueCount, BITS_PER_VALUE); for (int i = 0; i < valueCount; ++i) {
byte[] values = new byte[valueCount];
for(int i=0;i<valueCount;i++) {
values[i] = in.readByte(); values[i] = in.readByte();
} }
final int mod = valueCount % 8; final int mod = valueCount % 8;
if (mod != 0) { if (mod != 0) {
final int pad = 8-mod; for (int i = mod; i < 8; ++i) {
// round out long
for(int i=0;i<pad;i++) {
in.readByte(); in.readByte();
} }
} }
this.values = values;
}
/**
* Creates an array backed by the given values.
* </p><p>
* Note: The values are used directly, so changes to the given values will
* affect the structure.
* @param values used as the internal backing array.
*/
public Direct8(byte[] values) {
super(values.length, BITS_PER_VALUE);
this.values = values;
}
public long get(final int index) {
assert index >= 0 && index < size();
return 0xFFL & values[index];
}
public void set(final int index, final long value) {
values[index] = (byte)(value & 0xFF);
} }
@Override @Override
public void fill(int fromIndex, int toIndex, long val) { public long get(final int index) {
assert (val & 0xffL) == val; return values[index] & 0xFFL;
Arrays.fill(values, fromIndex, toIndex, (byte) val); }
public void set(final int index, final long value) {
values[index] = (byte) (value);
} }
public long ramBytesUsed() { public long ramBytesUsed() {
@ -88,7 +64,7 @@ class Direct8 extends PackedInts.MutableImpl {
} }
public void clear() { public void clear() {
Arrays.fill(values, (byte)0); Arrays.fill(values, (byte) 0L);
} }
@Override @Override
@ -100,4 +76,35 @@ class Direct8 extends PackedInts.MutableImpl {
public boolean hasArray() { public boolean hasArray() {
return true; return true;
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i] & 0xFFL;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = (byte) arr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val & 0xFFL);
Arrays.fill(values, fromIndex, toIndex, (byte) val);
}
} }

View File

@ -1,11 +1,7 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
* limitations under the License. * limitations under the License.
*/ */
/** 48 bitsPerValue backed by short[] */ import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Packs integers into 3 shorts (48 bits per value).
* @lucene.internal
*/
final class Packed16ThreeBlocks extends PackedInts.MutableImpl { final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
final short[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3; public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final short[] blocks;
Packed16ThreeBlocks(int valueCount) { Packed16ThreeBlocks(int valueCount) {
super(valueCount, 48); super(valueCount, 48);
if (valueCount > MAX_SIZE) { if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded"); throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
} }
this.blocks = new short[3 * valueCount]; blocks = new short[valueCount * 3];
} }
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException { Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount); this(valueCount);
for (int i = 0; i < blocks.length; i++) { for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readShort(); blocks[i] = in.readShort();
} }
final int mod = blocks.length % 4; final int mod = blocks.length % 4;
if (mod != 0) { if (mod != 0) {
final int pad = 4 - mod; for (int i = mod; i < 4; ++i) {
// round out long in.readShort();
for (int i = 0; i < pad; i++) {
in.readShort();
} }
} }
} }
@ -56,26 +58,55 @@ final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
@Override @Override
public long get(int index) { public long get(int index) {
final int o = index * 3; final int o = index * 3;
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL); return (blocks[o] & 0xFFFFL) << 32 | (blocks[o+1] & 0xFFFFL) << 16 | (blocks[o+2] & 0xFFFFL);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i] & 0xFFFFL) << 32 | (blocks[i+1] & 0xFFFFL) << 16 | (blocks[i+2] & 0xFFFFL);
}
return gets;
} }
@Override @Override
public void set(int index, long value) { public void set(int index, long value) {
final int o = index * 3; final int o = index * 3;
blocks[o] = (short) (value >> 32); blocks[o] = (short) (value >>> 32);
blocks[o+1] = (short) (value >> 16); blocks[o+1] = (short) (value >>> 16);
blocks[o+2] = (short) value; blocks[o+2] = (short) value;
} }
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = (short) (value >>> 32);
blocks[o++] = (short) (value >>> 16);
blocks[o++] = (short) value;
}
return sets;
}
@Override @Override
public void fill(int fromIndex, int toIndex, long val) { public void fill(int fromIndex, int toIndex, long val) {
short block1 = (short) (val >> 32); final short block1 = (short) (val >>> 32);
short block2 = (short) (val >> 16); final short block2 = (short) (val >>> 16);
short block3 = (short) val; final short block3 = (short) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) { for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i++] = block1; blocks[i] = block1;
blocks[i++] = block2; blocks[i+1] = block2;
blocks[i++] = block3; blocks[i+2] = block3;
} }
} }

View File

@ -138,6 +138,49 @@ class Packed64 extends PackedInts.MutableImpl {
& maskRight; & maskRight;
} }
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % op.values();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % op.values() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / op.values();
op.get(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * op.values();
index += gotValues;
len -= gotValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override @Override
public void set(final int index, final long value) { public void set(final int index, final long value) {
// The abstract index in a contiguous bit stream // The abstract index in a contiguous bit stream
@ -159,6 +202,48 @@ class Packed64 extends PackedInts.MutableImpl {
| (value << (BLOCK_SIZE - endBits)); | (value << (BLOCK_SIZE - endBits));
} }
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % op.values();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % op.values() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / op.values();
op.set(blocks, blockIndex, arr, off, iterations);
final int setValues = iterations * op.values();
index += setValues;
len -= setValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.set(index, arr, off, len);
}
}
@Override @Override
public String toString() { public String toString() {

View File

@ -39,12 +39,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0; return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
} }
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) { private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock return valueCount / valuesPerBlock
+ (valueCount % valuesPerBlock == 0 ? 0 : 1); + (valueCount % valuesPerBlock == 0 ? 0 : 1);
@ -92,16 +86,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk get // bulk get
assert index % valuesPerBlock == 0; assert index % valuesPerBlock == 0;
final long readMask = (1L << bitsPerValue) - 1; final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
final int startBlock = index / valuesPerBlock; assert op.blocks() == 1;
final int endBlock = (index + len) / valuesPerBlock; assert op.values() == valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock; final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.get(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff; index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
for (int i = 0; i < valuesPerBlock; ++i) {
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
}
}
if (index > originalIndex) { if (index > originalIndex) {
// stay at the block boundary // stay at the block boundary
@ -138,17 +130,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk set // bulk set
assert index % valuesPerBlock == 0; assert index % valuesPerBlock == 0;
final int startBlock = index / valuesPerBlock; final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
final int endBlock = (index + len) / valuesPerBlock; assert op.blocks() == 1;
final int diff = (endBlock - startBlock) * valuesPerBlock; assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.set(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff; index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
long next = 0L;
for (int i = 0; i < valuesPerBlock; ++i) {
next |= (arr[off++] << (i * bitsPerValue));
}
blocks[block] = next;
}
if (index > originalIndex) { if (index > originalIndex) {
// stay at the block boundary // stay at the block boundary
@ -202,8 +191,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
} }
@Override @Override
protected int getFormat() { protected PackedInts.Format getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK; return PackedInts.Format.PACKED_SINGLE_BLOCK;
} }
@Override @Override

View File

@ -1,87 +0,0 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
private long pending;
private int shift;
private final long mask;
private int position;
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
super(valueCount, bitsPerValue, in);
pending = 0;
shift = 64;
mask = ~(~0L << bitsPerValue);
position = -1;
}
@Override
public long next() throws IOException {
if (shift + bitsPerValue > 64) {
pending = in.readLong();
shift = 0;
}
final long next = (pending >>> shift) & mask;
shift += bitsPerValue;
++position;
return next;
}
@Override
public int ord() {
return position;
}
@Override
public long advance(int ord) throws IOException {
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final int valuesPerBlock = 64 / bitsPerValue;
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
final long targetBlock = ord / valuesPerBlock;
final long blocksToSkip = targetBlock - nextBlock;
if (blocksToSkip > 0) {
final long skip = blocksToSkip << 3;
final long filePointer = in.getFilePointer();
in.seek(filePointer + skip);
shift = 64;
final int offsetInBlock = ord % valuesPerBlock;
for (int i = 0; i < offsetInBlock; ++i) {
next();
}
} else {
for (int i = position; i < ord - 1; ++i) {
next();
}
}
position = ord - 1;
return next();
}
}

View File

@ -1,81 +0,0 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts.Writer;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link Writer} for {@link Packed64SingleBlock} readers.
*/
final class Packed64SingleBlockWriter extends Writer {
private long pending;
private int shift;
private int written;
Packed64SingleBlockWriter(DataOutput out, int valueCount,
int bitsPerValue) throws IOException {
super(out, valueCount, bitsPerValue);
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
pending = 0;
shift = 0;
written = 0;
}
@Override
protected int getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK;
}
@Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
if (shift + bitsPerValue > Long.SIZE) {
out.writeLong(pending);
pending = 0;
shift = 0;
}
pending |= v << shift;
shift += bitsPerValue;
++written;
}
@Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (shift > 0) {
// add was called at least once
out.writeLong(pending);
}
}
@Override
public String toString() {
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
+ bitsPerValue + " bits/value)";
}
}

View File

@ -1,11 +1,7 @@
// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed; package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
* limitations under the License. * limitations under the License.
*/ */
/** 24 bitsPerValue backed by byte[] */ import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Packs integers into 3 bytes (24 bits per value).
* @lucene.internal
*/
final class Packed8ThreeBlocks extends PackedInts.MutableImpl { final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
final byte[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3; public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
private final byte[] blocks;
Packed8ThreeBlocks(int valueCount) { Packed8ThreeBlocks(int valueCount) {
super(valueCount, 24); super(valueCount, 24);
if (valueCount > MAX_SIZE) { if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded"); throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
} }
this.blocks = new byte[3 * valueCount]; blocks = new byte[valueCount * 3];
} }
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException { Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount); this(valueCount);
for (int i = 0; i < blocks.length; i++) { for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readByte(); blocks[i] = in.readByte();
} }
final int mod = blocks.length % 8; final int mod = blocks.length % 8;
if (mod != 0) { if (mod != 0) {
final int pad = 8 - mod; for (int i = mod; i < 8; ++i) {
// round out long in.readByte();
for (int i = 0; i < pad; i++) {
in.readByte();
} }
} }
} }
@ -56,26 +58,55 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
@Override @Override
public long get(int index) { public long get(int index) {
final int o = index * 3; final int o = index * 3;
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL); return (blocks[o] & 0xFFL) << 16 | (blocks[o+1] & 0xFFL) << 8 | (blocks[o+2] & 0xFFL);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i] & 0xFFL) << 16 | (blocks[i+1] & 0xFFL) << 8 | (blocks[i+2] & 0xFFL);
}
return gets;
} }
@Override @Override
public void set(int index, long value) { public void set(int index, long value) {
final int o = index * 3; final int o = index * 3;
blocks[o] = (byte) (value >>> 16);
blocks[o+1] = (byte) (value >>> 8);
blocks[o+2] = (byte) value; blocks[o+2] = (byte) value;
blocks[o+1] = (byte) (value >> 8); }
blocks[o] = (byte) (value >> 16);
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = (byte) (value >>> 16);
blocks[o++] = (byte) (value >>> 8);
blocks[o++] = (byte) value;
}
return sets;
} }
@Override @Override
public void fill(int fromIndex, int toIndex, long val) { public void fill(int fromIndex, int toIndex, long val) {
byte block1 = (byte) (val >> 16); final byte block1 = (byte) (val >>> 16);
byte block2 = (byte) (val >> 8); final byte block2 = (byte) (val >>> 8);
byte block3 = (byte) val; final byte block3 = (byte) val;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) { for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i++] = block1; blocks[i] = block1;
blocks[i++] = block2; blocks[i+1] = block2;
blocks[i++] = block3; blocks[i+2] = block3;
} }
} }
@ -93,5 +124,4 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")"; + ", size=" + size() + ", elements.length=" + blocks.length + ")";
} }
} }

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.LongsRef;
import java.io.IOException; import java.io.IOException;
@ -34,7 +35,6 @@ import java.io.IOException;
* *
* @lucene.internal * @lucene.internal
*/ */
public class PackedInts { public class PackedInts {
/** /**
@ -62,12 +62,184 @@ public class PackedInts {
*/ */
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
final static String CODEC_NAME = "PackedInts"; public final static String CODEC_NAME = "PackedInts";
final static int VERSION_START = 0; public final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START; public final static int VERSION_CURRENT = VERSION_START;
static final int PACKED = 0; /**
static final int PACKED_SINGLE_BLOCK = 1; * A format to write packed ints.
*
* @lucene.internal
*/
public enum Format {
/**
* Compact format, all bits are written contiguously.
*/
PACKED(0) {
@Override
public int nblocks(int bitsPerValue, int values) {
return (int) Math.ceil((double) values * bitsPerValue / 64);
}
},
/**
* A format that may insert padding bits to improve encoding and decoding
* speed. Since this format doesn't support all possible bits per value, you
* should never use it directly, but rather use
* {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the
* format that best suits your needs.
*/
PACKED_SINGLE_BLOCK(1) {
@Override
public int nblocks(int bitsPerValue, int values) {
final int valuesPerBlock = 64 / bitsPerValue;
return (int) Math.ceil((double) values / valuesPerBlock);
}
@Override
public boolean isSupported(int bitsPerValue) {
return Packed64SingleBlock.isSupported(bitsPerValue);
}
@Override
public float overheadPerValue(int bitsPerValue) {
assert isSupported(bitsPerValue);
final int valuesPerBlock = 64 / bitsPerValue;
final int overhead = 64 % bitsPerValue;
return (float) overhead / valuesPerBlock;
}
};
/**
* Get a format according to its ID.
*/
public static Format byId(int id) {
for (Format format : Format.values()) {
if (format.getId() == id) {
return format;
}
}
throw new IllegalArgumentException("Unknown format id: " + id);
}
private Format(int id) {
this.id = id;
}
public int id;
/**
* Returns the ID of the format.
*/
public int getId() {
return id;
}
/**
* Computes how many blocks are needed to store <code>values</code> values
* of size <code>bitsPerValue</code>.
*/
public abstract int nblocks(int bitsPerValue, int values);
/**
* Tests whether the provided number of bits per value is supported by the
* format.
*/
public boolean isSupported(int bitsPerValue) {
return bitsPerValue >= 1 && bitsPerValue <= 64;
}
/**
* Returns the overhead per value, in bits.
*/
public float overheadPerValue(int bitsPerValue) {
assert isSupported(bitsPerValue);
return 0f;
}
/**
* Returns the overhead ratio (<code>overhead per value / bits per value</code>).
*/
public final float overheadRatio(int bitsPerValue) {
assert isSupported(bitsPerValue);
return overheadPerValue(bitsPerValue) / bitsPerValue;
}
}
/**
* Simple class that holds a format and a number of bits per value.
*/
public static class FormatAndBits {
public final Format format;
public final int bitsPerValue;
public FormatAndBits(Format format, int bitsPerValue) {
this.format = format;
this.bitsPerValue = bitsPerValue;
}
}
/**
* Try to find the {@link Format} and number of bits per value that would
* restore from disk the fastest reader whose overhead is less than
* <code>acceptableOverheadRatio</code>.
* </p><p>
* The <code>acceptableOverheadRatio</code> parameter makes sense for
* random-access {@link Reader}s. In case you only plan to perform
* sequential access on this stream later on, you should probably use
* {@link PackedInts#COMPACT}.
* </p><p>
* If you don't know how many values you are going to write, use
* <code>valueCount = -1</code>.
*/
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
if (valueCount == -1) {
valueCount = Integer.MAX_VALUE;
}
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
int actualBitsPerValue = -1;
Format format = Format.PACKED;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
actualBitsPerValue = 8;
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
actualBitsPerValue = 16;
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
actualBitsPerValue = 32;
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
actualBitsPerValue = 64;
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
actualBitsPerValue = 24;
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
actualBitsPerValue = 48;
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) {
float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
actualBitsPerValue = bpv;
format = Format.PACKED_SINGLE_BLOCK;
break;
}
}
}
if (actualBitsPerValue < 0) {
actualBitsPerValue = bitsPerValue;
}
}
return new FormatAndBits(format, actualBitsPerValue);
}
/** /**
* A read-only random access array of positive integers. * A read-only random access array of positive integers.
@ -132,30 +304,39 @@ public class PackedInts {
public static interface ReaderIterator extends Closeable { public static interface ReaderIterator extends Closeable {
/** Returns next value */ /** Returns next value */
long next() throws IOException; long next() throws IOException;
/** Returns at least 1 and at most <code>count</code> next values,
* the returned ref MUST NOT be modified */
LongsRef next(int count) throws IOException;
/** Returns number of bits per value */ /** Returns number of bits per value */
int getBitsPerValue(); int getBitsPerValue();
/** Returns number of values */ /** Returns number of values */
int size(); int size();
/** Returns the current position */ /** Returns the current position */
int ord(); int ord();
/** Skips to the given ordinal and returns its value.
* @return the value at the given position
* @throws IOException if reading the value throws an IOException*/
long advance(int ord) throws IOException;
} }
static abstract class ReaderIteratorImpl implements ReaderIterator { static abstract class ReaderIteratorImpl implements ReaderIterator {
protected final IndexInput in; protected final DataInput in;
protected final int bitsPerValue; protected final int bitsPerValue;
protected final int valueCount; protected final int valueCount;
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) { protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) {
this.in = in; this.in = in;
this.bitsPerValue = bitsPerValue; this.bitsPerValue = bitsPerValue;
this.valueCount = valueCount; this.valueCount = valueCount;
} }
@Override
public long next() throws IOException {
LongsRef nextValues = next(1);
assert nextValues.length > 0;
final long result = nextValues.longs[nextValues.offset];
++nextValues.offset;
--nextValues.length;
return result;
}
@Override @Override
public int getBitsPerValue() { public int getBitsPerValue() {
return bitsPerValue; return bitsPerValue;
@ -168,7 +349,9 @@ public class PackedInts {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
in.close(); if (in instanceof Closeable) {
((Closeable) in).close();
}
} }
} }
@ -217,7 +400,7 @@ public class PackedInts {
* A simple base for Readers that keeps track of valueCount and bitsPerValue. * A simple base for Readers that keeps track of valueCount and bitsPerValue.
* @lucene.internal * @lucene.internal
*/ */
public static abstract class ReaderImpl implements Reader { static abstract class ReaderImpl implements Reader {
protected final int bitsPerValue; protected final int bitsPerValue;
protected final int valueCount; protected final int valueCount;
@ -257,7 +440,7 @@ public class PackedInts {
} }
public static abstract class MutableImpl extends ReaderImpl implements Mutable { static abstract class MutableImpl extends ReaderImpl implements Mutable {
protected MutableImpl(int valueCount, int bitsPerValue) { protected MutableImpl(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue); super(valueCount, bitsPerValue);
@ -283,13 +466,15 @@ public class PackedInts {
} }
} }
protected int getFormat() { protected Format getFormat() {
return PACKED; return Format.PACKED;
} }
@Override @Override
public void save(DataOutput out) throws IOException { public void save(DataOutput out) throws IOException {
Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat()); Writer writer = getWriterNoHeader(out, getFormat(),
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
writer.writeHeader();
for (int i = 0; i < valueCount; ++i) { for (int i = 0; i < valueCount; ++i) {
writer.add(get(i)); writer.add(get(i));
} }
@ -302,121 +487,209 @@ public class PackedInts {
*/ */
public static abstract class Writer { public static abstract class Writer {
protected final DataOutput out; protected final DataOutput out;
protected final int bitsPerValue;
protected final int valueCount; protected final int valueCount;
protected final int bitsPerValue;
protected Writer(DataOutput out, int valueCount, int bitsPerValue) protected Writer(DataOutput out, int valueCount, int bitsPerValue)
throws IOException { throws IOException {
assert bitsPerValue <= 64; assert bitsPerValue <= 64;
assert valueCount >= 0 || valueCount == -1;
this.out = out; this.out = out;
this.valueCount = valueCount; this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue; this.bitsPerValue = bitsPerValue;
}
void writeHeader() throws IOException {
assert valueCount != -1;
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue); out.writeVInt(bitsPerValue);
out.writeVInt(valueCount); out.writeVInt(valueCount);
out.writeVInt(getFormat()); out.writeVInt(getFormat().getId());
} }
protected abstract int getFormat(); /** The format used to serialize values. */
protected abstract PackedInts.Format getFormat();
/** Add a value to the stream. */
public abstract void add(long v) throws IOException; public abstract void add(long v) throws IOException;
/** The number of bits per value. */
public final int bitsPerValue() {
return bitsPerValue;
}
/** Perform end-of-stream operations. */
public abstract void finish() throws IOException; public abstract void finish() throws IOException;
/**
* Returns the current ord in the stream (number of values that have been
* written so far minus one).
*/
public abstract int ord();
} }
/** /**
* Retrieve PackedInt data from the DataInput and return a packed int * Expert: Restore a {@link Reader} from a stream without reading metadata at
* structure based on it. * the beginning of the stream. This method is useful to restore data from
* streams which have been created using
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
* *
* @param in positioned at the beginning of a stored packed int structure. * @param in the stream to read data from, positioned at the beginning of the packed values
* @return a read only random access capable array of positive integers. * @param format the format used to serialize
* @throws IOException if the structure could not be retrieved. * @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a Reader
* @throws IOException
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal * @lucene.internal
*/ */
public static Reader getReader(DataInput in) throws IOException { public static Reader getReaderNoHeader(DataInput in, Format format, int version,
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); int valueCount, int bitsPerValue) throws IOException {
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) { switch (format) {
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED: case PACKED:
switch (bitsPerValue) { switch (bitsPerValue) {
case 8: case 8:
return new Direct8(in, valueCount); return new Direct8(in, valueCount);
case 16: case 16:
return new Direct16(in, valueCount); return new Direct16(in, valueCount);
case 24:
return new Packed8ThreeBlocks(in, valueCount);
case 32: case 32:
return new Direct32(in, valueCount); return new Direct32(in, valueCount);
case 48:
return new Packed16ThreeBlocks(in, valueCount);
case 64: case 64:
return new Direct64(in, valueCount); return new Direct64(in, valueCount);
default: case 24:
return new Packed64(in, valueCount, bitsPerValue); if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
return new Packed8ThreeBlocks(in, valueCount);
}
break;
case 48:
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
return new Packed16ThreeBlocks(in, valueCount);
}
break;
} }
case PACKED_SINGLE_BLOCK: return new Packed64(in, valueCount, bitsPerValue);
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
default: default:
throw new AssertionError("Unknwown Writer format: " + format); throw new AssertionError("Unknwown Writer format: " + format);
} }
} }
/**
* Restore a {@link Reader} from a stream.
*
* @param in the stream to read data from
* @return a Reader
* @throws IOException
* @lucene.internal
*/
public static Reader getReader(DataInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Expert: Restore a {@link ReaderIterator} from a stream without reading
* metadata at the beginning of the stream. This method is useful to restore
* data from streams which have been created using
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return a ReaderIterator
* @throws IOException
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal
*/
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
int valueCount, int bitsPerValue, int mem) throws IOException {
return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
}
/** /**
* Retrieve PackedInts as a {@link ReaderIterator} * Retrieve PackedInts as a {@link ReaderIterator}
* @param in positioned at the beginning of a stored packed int structure. * @param in positioned at the beginning of a stored packed int structure.
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return an iterator to access the values * @return an iterator to access the values
* @throws IOException if the structure could not be retrieved. * @throws IOException if the structure could not be retrieved.
* @lucene.internal * @lucene.internal
*/ */
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException { public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt(); final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt(); final int valueCount = in.readVInt();
final int format = in.readVInt(); final Format format = Format.byId(in.readVInt());
switch (format) { return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
case PACKED:
return new PackedReaderIterator(valueCount, bitsPerValue, in);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
} }
/** /**
* Retrieve PackedInts.Reader that does not load values * Expert: Construct a direct {@link Reader} from a stream without reading
* into RAM but rather accesses all values via the * metadata at the beginning of the stream. This method is useful to restore
* provided IndexInput. * data from streams which have been created using
* @param in positioned at the beginning of a stored packed int structure. * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
* @return an Reader to access the values * </p><p>
* @throws IOException if the structure could not be retrieved. * The returned reader will have very little memory overhead, but every call
* to {@link Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a direct Reader
* @throws IOException
* @lucene.internal * @lucene.internal
*/ */
public static Reader getDirectReader(IndexInput in) throws IOException { public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); int version, int valueCount, int bitsPerValue) throws IOException {
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) { switch (format) {
case PACKED: case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in); return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK: case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in); return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default: default:
throw new AssertionError("Unknwown Writer format: " + format); throw new AssertionError("Unknwown format: " + format);
} }
} }
/**
* Construct a direct {@link Reader} from an {@link IndexInput}. This method
* is useful to restore data from streams which have been created using
* {@link PackedInts#getWriter(DataOutput, int, int, float)}.
* </p><p>
* The returned reader will have very little memory overhead, but every call
* to {@link Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @return a direct Reader
* @throws IOException
* @lucene.internal
*/
public static Reader getDirectReader(IndexInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/** /**
* Create a packed integer array with the given amount of values initialized * Create a packed integer array with the given amount of values initialized
* to 0. the valueCount and the bitsPerValue cannot be changed after creation. * to 0. the valueCount and the bitsPerValue cannot be changed after creation.
* All Mutables known by this factory are kept fully in RAM. * All Mutables known by this factory are kept fully in RAM.
* * </p><p>
* Positive values of <code>acceptableOverheadRatio</code> will trade space * Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient * for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of * implementation. An <code>acceptableOverheadRatio</code> of
@ -433,103 +706,130 @@ public class PackedInts {
*/ */
public static Mutable getMutable(int valueCount, public static Mutable getMutable(int valueCount,
int bitsPerValue, float acceptableOverheadRatio) { int bitsPerValue, float acceptableOverheadRatio) {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio); assert valueCount >= 0;
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue; final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
switch (formatAndBits.format) {
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) { case PACKED_SINGLE_BLOCK:
return new Direct8(valueCount); return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) { case PACKED:
return new Direct16(valueCount); switch (formatAndBits.bitsPerValue) {
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) { case 8:
return new Direct32(valueCount); return new Direct8(valueCount);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) { case 16:
return new Direct64(valueCount); return new Direct16(valueCount);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) { case 32:
return new Packed8ThreeBlocks(valueCount); return new Direct32(valueCount);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) { case 64:
return new Packed16ThreeBlocks(valueCount); return new Direct64(valueCount);
} else { case 24:
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) { if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
if (Packed64SingleBlock.isSupported(bpv)) { return new Packed8ThreeBlocks(valueCount);
float overhead = Packed64SingleBlock.overheadPerValue(bpv); }
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv; break;
if (overhead <= acceptableOverhead) { case 48:
return Packed64SingleBlock.create(valueCount, bpv); if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
} return new Packed16ThreeBlocks(valueCount);
}
break;
} }
} return new Packed64(valueCount, formatAndBits.bitsPerValue);
return new Packed64(valueCount, bitsPerValue); default:
throw new AssertionError();
} }
} }
/** /**
* Create a packed integer array writer for the given number of values at the * Expert: Create a packed integer array writer for the given output, format,
* given bits/value. Writers append to the given IndexOutput and has very * value count, and number of bits per value.
* low memory overhead. * </p><p>
* The resulting stream will be long-aligned. This means that depending on
* the format which is used, up to 63 bits will be wasted. An easy way to
* make sure that no space is lost is to always use a <code>valueCount</code>
* that is a multiple of 64.
* </p><p>
* This method does not write any metadata to the stream, meaning that it is
* your responsibility to store it somewhere else in order to be able to
* recover data from the stream later on:
* <ul>
* <li><code>format</code> (using {@link Format#getId()}),</li>
* <li><code>valueCount</code>,</li>
* <li><code>bitsPerValue</code>,</li>
* <li>{@link #VERSION_CURRENT}.</li>
* </ul>
* </p><p>
* It is possible to start writing values without knowing how many of them you
* are actually going to write. To do this, just pass <code>-1</code> as
* <code>valueCount</code>. On the other hand, for any positive value of
* <code>valueCount</code>, the returned writer will make sure that you don't
* write more values than expected and pad the end of stream with zeros in
* case you have written less than <code>valueCount</code> when calling
* {@link Writer#finish()}.
* </p><p>
* The <code>mem</code> parameter lets you control how much memory can be used
* to buffer changes in memory before flushing to disk. High values of
* <code>mem</code> are likely to improve throughput. On the other hand, if
* speed is not that important to you, a value of <code>0</code> will use as
* little memory as possible and should already offer reasonable throughput.
* *
* Positive values of <code>acceptableOverheadRatio</code> will trade space * @param out the data output
* @param format the format to use to serialize the values
* @param valueCount the number of values
* @param bitsPerValue the number of bits per value
* @param mem how much memory (in bytes) can be used to speed up serialization
* @return a Writer
* @throws IOException
* @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
* @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
* @lucene.internal
*/
public static Writer getWriterNoHeader(
DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) throws IOException {
return new PackedWriter(format, out, valueCount, bitsPerValue, mem);
}
/**
* Create a packed integer array writer for the given output, format, value
* count, and number of bits per value.
* </p><p>
* The resulting stream will be long-aligned. This means that depending on
* the format which is used under the hoods, up to 63 bits will be wasted.
* An easy way to make sure that no space is lost is to always use a
* <code>valueCount</code> that is a multiple of 64.
* </p><p>
* This method writes metadata to the stream, so that the resulting stream is
* sufficient to restore a {@link Reader} from it. You don't need to track
* <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case
* this is a problem, you should probably look at
* {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}.
* </p><p>
* The <code>acceptableOverheadRatio</code> parameter controls how
* readers that will be restored from this stream trade space
* for speed by selecting a faster but potentially less memory-efficient * for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of * implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient * {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure * implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected. * that the fastest implementation is selected. In case you are only interested
* in reading this stream sequentially later on, you should probably use
* {@link PackedInts#COMPACT}.
* *
* @param out the destination for the produced bits. * @param out the data output
* @param valueCount the number of elements. * @param valueCount the number of values
* @param bitsPerValue the number of bits available for any given value. * @param bitsPerValue the number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio per value * @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a Writer ready for receiving values. * @return a Writer
* @throws IOException if bits could not be written to out. * @throws IOException
* @lucene.internal * @lucene.internal
*/ */
public static Writer getWriter(DataOutput out, public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio) int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException { throws IOException {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio); assert valueCount >= 0;
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue; final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) { writer.writeHeader();
return getWriterByFormat(out, valueCount, 8, PACKED); return writer;
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return getWriterByFormat(out, valueCount, 16, PACKED);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return getWriterByFormat(out, valueCount, 32, PACKED);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return getWriterByFormat(out, valueCount, 64, PACKED);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return getWriterByFormat(out, valueCount, 24, PACKED);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return getWriterByFormat(out, valueCount, 48, PACKED);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
}
}
}
return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
}
}
private static Writer getWriterByFormat(DataOutput out,
int valueCount, int bitsPerValue, int format) throws IOException {
switch (format) {
case PACKED:
return new PackedWriter(out, valueCount, bitsPerValue);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
default:
throw new IllegalArgumentException("Unknown format " + format);
}
} }
/** Returns how many bits are required to hold values up /** Returns how many bits are required to hold values up

View File

@ -17,76 +17,71 @@ package org.apache.lucene.util.packed;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.store.IndexInput; import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.LongsRef;
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl { final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
private long pending;
private int pendingBitsLeft;
private int position = -1;
// masks[n-1] masks for bottom n bits final PackedInts.Format format;
private final long[] masks; final BulkOperation bulkOperation;
final long[] nextBlocks;
final LongsRef nextValues;
final int iterations;
int position;
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in) { PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
super(valueCount, bitsPerValue, in); super(valueCount, bitsPerValue, in);
this.format = format;
masks = new long[bitsPerValue]; bulkOperation = BulkOperation.of(format, bitsPerValue);
iterations = bulkOperation.computeIterations(valueCount, mem);
long v = 1; assert iterations > 0;
for (int i = 0; i < bitsPerValue; i++) { nextBlocks = new long[iterations * bulkOperation.blocks()];
v *= 2; nextValues = new LongsRef(new long[iterations * bulkOperation.values()], 0, 0);
masks[i] = v - 1; assert iterations * bulkOperation.values() == nextValues.longs.length;
} assert iterations * bulkOperation.blocks() == nextBlocks.length;
nextValues.offset = nextValues.longs.length;
position = -1;
} }
public long next() throws IOException { @Override
if (pendingBitsLeft == 0) { public LongsRef next(int count) throws IOException {
pending = in.readLong(); assert nextValues.length >= 0;
pendingBitsLeft = 64; assert count > 0;
assert nextValues.offset + nextValues.length <= nextValues.longs.length;
nextValues.offset += nextValues.length;
final int remaining = valueCount - position - 1;
if (remaining <= 0) {
throw new EOFException();
}
count = Math.min(remaining, count);
if (nextValues.offset == nextValues.longs.length) {
final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
for (int i = 0; i < blocksToRead; ++i) {
nextBlocks[i] = in.readLong();
}
for (int i = blocksToRead; i < nextBlocks.length; ++i) {
nextBlocks[i] = 0L;
}
bulkOperation.get(nextBlocks, 0, nextValues.longs, 0, iterations);
nextValues.offset = 0;
} }
final long result; nextValues.length = Math.min(nextValues.longs.length - nextValues.offset, count);
if (pendingBitsLeft >= bitsPerValue) { // not split position += nextValues.length;
result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1]; return nextValues;
pendingBitsLeft -= bitsPerValue;
} else { // split
final int bits1 = bitsPerValue - pendingBitsLeft;
final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
pending = in.readLong();
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
result = result1 | result2;
}
++position;
return result;
} }
@Override
public int ord() { public int ord() {
return position; return position;
} }
public long advance(final int ord) throws IOException{
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final long bits = (long) bitsPerValue;
final int posToSkip = ord - 1 - position;
final long bitsToSkip = (bits * (long)posToSkip);
if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
pendingBitsLeft -= bitsToSkip;
} else {
final long skip = bitsToSkip-pendingBitsLeft;
final long closestByte = (skip >> 6) << 3;
if (closestByte != 0) { // need to seek
final long filePointer = in.getFilePointer();
in.seek(filePointer + closestByte);
}
pending = in.readLong();
pendingBitsLeft = 64 - (int)(skip % 64);
}
position = ord-1;
return next();
}
} }

View File

@ -19,101 +19,79 @@ package org.apache.lucene.util.packed;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
// Packs high order byte first, to match // Packs high order byte first, to match
// IndexOutput.writeInt/Long/Short byte order // IndexOutput.writeInt/Long/Short byte order
/** final class PackedWriter extends PackedInts.Writer {
* Generic writer for space-optimal packed values. The resulting bits can be
* used directly by Packed32, Packed64 and PackedDirect* and will always be
* long-aligned.
*/
class PackedWriter extends PackedInts.Writer { boolean finished;
private long pending; final PackedInts.Format format;
private int pendingBitPos; final BulkOperation bulkOperation;
final long[] nextBlocks;
final long[] nextValues;
final int iterations;
int off;
int written;
// masks[n-1] masks for bottom n bits PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem)
private final long[] masks; throws IOException {
private int written = 0;
public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
throws IOException {
super(out, valueCount, bitsPerValue); super(out, valueCount, bitsPerValue);
this.format = format;
pendingBitPos = 64; bulkOperation = BulkOperation.of(format, bitsPerValue);
masks = new long[bitsPerValue - 1]; iterations = bulkOperation.computeIterations(valueCount, mem);
nextBlocks = new long[iterations * bulkOperation.blocks()];
long v = 1; nextValues = new long[iterations * bulkOperation.values()];
for (int i = 0; i < bitsPerValue - 1; i++) { off = 0;
v *= 2; written = 0;
masks[i] = v - 1; finished = false;
}
} }
@Override @Override
protected int getFormat() { protected PackedInts.Format getFormat() {
return PackedInts.PACKED; return format;
} }
/**
* Do not call this after finish
*/
@Override @Override
public void add(long v) throws IOException { public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v assert v >= 0 && v <= PackedInts.maxValue(bitsPerValue);
+ " maxValue=" + PackedInts.maxValue(bitsPerValue); assert !finished;
assert v >= 0; if (valueCount != -1 && written >= valueCount) {
//System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos); throw new EOFException("Writing past end of stream");
// TODO
if (pendingBitPos >= bitsPerValue) {
// not split
// write-once, so we can |= w/o first masking to 0s
pending |= v << (pendingBitPos - bitsPerValue);
if (pendingBitPos == bitsPerValue) {
// flush
out.writeLong(pending);
pending = 0;
pendingBitPos = 64;
} else {
pendingBitPos -= bitsPerValue;
}
} else {
// split
// write top pendingBitPos bits of value into bottom bits of pending
pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
//System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
// flush
out.writeLong(pending);
// write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
pendingBitPos = 64 - bitsPerValue + pendingBitPos;
//System.out.println(" part2 v << " + pendingBitPos);
pending = (v << pendingBitPos);
} }
written++; nextValues[off++] = v;
if (off == nextValues.length) {
flush(nextValues.length);
off = 0;
}
++written;
} }
@Override @Override
public void finish() throws IOException { public void finish() throws IOException {
while (written < valueCount) { assert !finished;
add(0L); // Auto flush if (valueCount != -1) {
while (written < valueCount) {
add(0L);
}
} }
flush(off);
finished = true;
}
if (pendingBitPos != 64) { private void flush(int nvalues) throws IOException {
out.writeLong(pending); bulkOperation.set(nextBlocks, 0, nextValues, 0, iterations);
final int blocks = format.nblocks(bitsPerValue, nvalues);
for (int i = 0; i < blocks; ++i) {
out.writeLong(nextBlocks[i]);
} }
off = 0;
} }
@Override @Override
public String toString() { public int ord() {
return "PackedWriter(written " + written + "/" + valueCount + " with " return written - 1;
+ bitsPerValue + " bits/value)";
} }
} }

View File

@ -0,0 +1,264 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from fractions import gcd
"""Code generation for bulk operations"""
PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
OUTPUT_FILE = "BulkOperation.java"
HEADER = """// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.EnumMap;
/**
* Efficient sequential read/write of packed integers.
*/
abstract class BulkOperation {
static final EnumMap<PackedInts.Format, BulkOperation[]> BULK_OPERATIONS = new EnumMap<PackedInts.Format, BulkOperation[]>(PackedInts.Format.class);
public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {
assert bitsPerValue > 0 && bitsPerValue <= 64;
BulkOperation[] ops = BULK_OPERATIONS.get(format);
if (ops == null || ops[bitsPerValue] == null) {
throw new IllegalArgumentException("format: " + format + ", bitsPerValue: " + bitsPerValue);
}
return ops[bitsPerValue];
}
/**
* For every number of bits per value, there is a minimum number of
* blocks (b) / values (v) you need to write in order to reach the next block
* boundary:
* - 16 bits per value -> b=1, v=4
* - 24 bits per value -> b=3, v=8
* - 50 bits per value -> b=25, v=32
* - 63 bits per value -> b=63, v = 64
* - ...
*
* A bulk read consists in copying <code>iterations*v</code> values that are
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
* (higher values of <code>iterations</code> are likely to yield a better
* throughput) => this requires n * (b + v) longs in memory.
*
* This method computes <code>iterations</code> as
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
*/
public final int computeIterations(int valueCount, int ramBudget) {
final int iterations = (ramBudget >>> 3) / (blocks() + values());
if (iterations == 0) {
// at least 1
return 1;
} else if ((iterations - 1) * blocks() >= valueCount) {
// don't allocate for more than the size of the reader
return (int) Math.ceil((double) valueCount / values());
} else {
return iterations;
}
}
/**
* The minimum number of blocks required to perform a bulk get/set.
*/
public abstract int blocks();
/**
* The number of values that can be stored in <code>blocks()</code> blocks.
*/
public abstract int values();
/**
* Get <code>n * values()</code> values from <code>n * blocks()</code> blocks.
*/
public abstract void get(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
/**
* Set <code>n * values()</code> values into <code>n * blocks()</code> blocks.
*/
public abstract void set(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
"""
FOOTER = "}"
def packed64singleblock(bpv, f):
values = 64 / bpv
f.write("\n static final class Packed64SingleBlockBulkOperation%d extends BulkOperation {\n\n" %bpv)
f.write(" public int blocks() {\n")
f.write(" return 1;\n")
f.write(" }\n\n")
f.write(" public int values() {\n")
f.write(" return %d;\n" %values)
f.write(" }\n\n")
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
f.write(" final long block = blocks[bi++];\n")
mask = (1 << bpv) - 1
for i in xrange(values):
block_offset = i / values
offset_in_block = i % values
if i == 0:
f.write(" values[vi++] = block & %dL;\n" %mask)
elif i == values - 1:
f.write(" values[vi++] = block >>> %d;\n" %(i * bpv))
else:
f.write(" values[vi++] = (block >>> %d) & %dL;\n" %(i * bpv, mask))
f.write(" }\n")
f.write(" }\n\n")
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(values):
block_offset = i / values
offset_in_block = i % values
if i == 0:
f.write(" blocks[bi++] = values[vi++]")
else:
f.write(" | (values[vi++] << %d)" %(i * bpv))
if i == values - 1:
f.write(";\n")
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
def packed64(bpv, f):
blocks = bpv
values = blocks * 64 / bpv
while blocks % 2 == 0 and values % 2 == 0:
blocks /= 2
values /= 2
assert values * bpv == 64 * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
mask = (1 << bpv) - 1
f.write(" static final class Packed64BulkOperation%d extends BulkOperation {\n\n" %bpv)
f.write(" public int blocks() {\n")
f.write(" return %d;\n" %blocks)
f.write(" }\n\n")
f.write(" public int values() {\n")
f.write(" return %d;\n" %values)
f.write(" }\n\n")
if bpv == 64:
f.write(""" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {
System.arraycopy(blocks, bi, values, vi, iterations);
}
public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {
System.arraycopy(values, bi, blocks, vi, iterations);
}
}
""")
return
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(0, values):
block_offset = i * bpv / 64
bit_offset = (i * bpv) % 64
if bit_offset == 0:
# start of block
f.write(" final long block%d = blocks[bi++];\n" %block_offset);
f.write(" values[vi++] = block%d >>> %d;\n" %(block_offset, 64 - bpv))
elif bit_offset + bpv == 64:
# end of block
f.write(" values[vi++] = block%d & %dL;\n" %(block_offset, mask))
elif bit_offset + bpv < 64:
# middle of block
f.write(" values[vi++] = (block%d >>> %d) & %dL;\n" %(block_offset, 64 - bit_offset - bpv, mask))
else:
# value spans across 2 blocks
mask1 = (1 << (64 - bit_offset)) -1
shift1 = bit_offset + bpv - 64
shift2 = 64 - shift1
f.write(" final long block%d = blocks[bi++];\n" %(block_offset + 1));
f.write(" values[vi++] = ((block%d & %dL) << %d) | (block%d >>> %d);\n" %(block_offset, mask1, shift1, block_offset + 1, shift2))
f.write(" }\n")
f.write(" }\n\n")
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
f.write(" assert vi + iterations * values() <= values.length;\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(0, values):
block_offset = i * bpv / 64
bit_offset = (i * bpv) % 64
if bit_offset == 0:
# start of block
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - bpv))
elif bit_offset + bpv == 64:
# end of block
f.write(" | values[vi++];\n")
elif bit_offset + bpv < 64:
# inside a block
f.write(" | (values[vi++] << %d)" %(64 - bit_offset - bpv))
else:
# value spans across 2 blocks
right_bits = bit_offset + bpv - 64
f.write(" | (values[vi] >>> %d);\n" %right_bits)
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - right_bits))
f.write(" }\n")
f.write(" }\n")
f.write(" }\n\n")
if __name__ == '__main__':
p64_bpv = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
f = open(OUTPUT_FILE, 'w')
f.write(HEADER)
f.write(" static {\n")
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED, new BulkOperation[65]);")
for bpv in xrange(1, 65):
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED)[%d] = new Packed64BulkOperation%d();\n" %(bpv, bpv))
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED_SINGLE_BLOCK, new BulkOperation[65]);\n")
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED_SINGLE_BLOCK)[%d] = new Packed64SingleBlockBulkOperation%d();\n" %(bpv, bpv))
f.write(" }\n")
for bpv in xrange(1, 65):
packed64(bpv, f)
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
packed64singleblock(bpv,f)
f.write(FOOTER)
f.close()

View File

@ -0,0 +1,175 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
HEADER="""// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
"""
TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
if __name__ == '__main__':
for bpv in TYPES.keys():
type
f = open("Direct%d.java" %bpv, 'w')
f.write(HEADER)
f.write("""/**
* Direct wrapping of %d-bits values to a backing array.
* @lucene.internal
*/\n""" %bpv)
f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
f.write(" final %s[] values;\n\n" %TYPES[bpv])
f.write(" Direct%d(int valueCount) {\n" %bpv)
f.write(" super(valueCount, %d);\n" %bpv)
f.write(" values = new %s[valueCount];\n" %TYPES[bpv])
f.write(" }\n\n")
f.write(" Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
f.write(" for (int i = 0; i < valueCount; ++i) {\n")
f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
if bpv != 64:
f.write(" final int mod = valueCount %% %d;\n" %(64 / bpv))
f.write(" if (mod != 0) {\n")
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
f.write(" in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
f.write("""
@Override
public long get(final int index) {
return values[index]%s;
}
public void set(final int index, final long value) {
values[index] = %s(value);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(values);
}
public void clear() {
Arrays.fill(values, %s0L);
}
@Override
public Object getArray() {
return values;
}
@Override
public boolean hasArray() {
return true;
}
""" %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
if bpv == 64:
f.write("""
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
}
""")
else:
f.write("""
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = values[i]%s;
}
return gets;
}
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
values[i] = %sarr[o];
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val == (val%s);
Arrays.fill(values, fromIndex, toIndex, %sval);
}
""" %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
f.write("}\n")
f.close()

View File

@ -58,12 +58,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0; return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
} }
public static float overheadPerValue(int bitsPerValue) {
int valuesPerBlock = 64 / bitsPerValue;
int overhead = 64 %% bitsPerValue;
return (float) overhead / valuesPerBlock;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) { private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock return valueCount / valuesPerBlock
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1); + (valueCount %% valuesPerBlock == 0 ? 0 : 1);
@ -111,16 +105,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk get // bulk get
assert index %% valuesPerBlock == 0; assert index %% valuesPerBlock == 0;
final long readMask = (1L << bitsPerValue) - 1; final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
final int startBlock = index / valuesPerBlock; assert op.blocks() == 1;
final int endBlock = (index + len) / valuesPerBlock; assert op.values() == valuesPerBlock;
final int diff = (endBlock - startBlock) * valuesPerBlock; final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.get(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff; index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
for (int i = 0; i < valuesPerBlock; ++i) {
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
}
}
if (index > originalIndex) { if (index > originalIndex) {
// stay at the block boundary // stay at the block boundary
@ -157,17 +149,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk set // bulk set
assert index %% valuesPerBlock == 0; assert index %% valuesPerBlock == 0;
final int startBlock = index / valuesPerBlock; final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
final int endBlock = (index + len) / valuesPerBlock; assert op.blocks() == 1;
final int diff = (endBlock - startBlock) * valuesPerBlock; assert op.values() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.set(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff; index += diff; len -= diff;
for (int block = startBlock; block < endBlock; ++block) {
long next = 0L;
for (int i = 0; i < valuesPerBlock; ++i) {
next |= (arr[off++] << (i * bitsPerValue));
}
blocks[block] = next;
}
if (index > originalIndex) { if (index > originalIndex) {
// stay at the block boundary // stay at the block boundary
@ -221,8 +210,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
} }
@Override @Override
protected int getFormat() { protected PackedInts.Format getFormat() {
return PackedInts.PACKED_SINGLE_BLOCK; return PackedInts.Format.PACKED_SINGLE_BLOCK;
} }
@Override @Override

View File

@ -0,0 +1,161 @@
#! /usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
HEADER="""// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
"""
TYPES = {8: "byte", 16: "short"}
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
if __name__ == '__main__':
for bpv in TYPES.keys():
type
f = open("Packed%dThreeBlocks.java" %bpv, 'w')
f.write(HEADER)
f.write("""/**
* Packs integers into 3 %ss (%d bits per value).
* @lucene.internal
*/\n""" %(TYPES[bpv], bpv*3))
f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
f.write(" final %s[] blocks;\n\n" %TYPES[bpv])
f.write(" public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
f.write(" Packed%dThreeBlocks(int valueCount) {\n" %bpv)
f.write(" super(valueCount, %d);\n" %(bpv*3))
f.write(" if (valueCount > MAX_SIZE) {\n")
f.write(" throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
f.write(" }\n")
f.write(" blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
f.write(" }\n\n")
f.write(" Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" final int mod = blocks.length %% %d;\n" %(64 / bpv))
f.write(" if (mod != 0) {\n")
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
f.write(" in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
f.write(" }\n")
f.write(" }\n")
f.write("""
@Override
public long get(int index) {
final int o = index * 3;
return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
}
return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
blocks[o] = %s(value >>> %d);
blocks[o+1] = %s(value >>> %d);
blocks[o+2] = %svalue;
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int sets = Math.min(valueCount - index, len);
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
final long value = arr[i];
blocks[o++] = %s(value >>> %d);
blocks[o++] = %s(value >>> %d);
blocks[o++] = %svalue;
}
return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
final %s block1 = %s(val >>> %d);
final %s block2 = %s(val >>> %d);
final %s block3 = %sval;
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
blocks[i] = block1;
blocks[i+1] = block2;
blocks[i+2] = block3;
}
}
@Override
public void clear() {
Arrays.fill(blocks, %s0);
}
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(blocks);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
}
""" %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
f.close()

View File

@ -19,9 +19,16 @@
<head></head> <head></head>
<body bgcolor="white"> <body bgcolor="white">
<p>Packed integer arrays and streams.</p>
<p> <p>
The packed package provides random access capable arrays of positive longs. The packed package provides
The implementations provides different trade offs between memory usage and <ul>
<li>sequential and random access capable arrays of positive longs,</li>
<li>routines for efficient serialization and deserialization of streams of packed integers.</li>
</ul>
The implementations provide different trade-offs between memory usage and
access speed. The standard usage scenario is replacing large int or long access speed. The standard usage scenario is replacing large int or long
arrays in order to reduce the memory footprint. arrays in order to reduce the memory footprint.
</p><p> </p><p>

View File

@ -24,6 +24,7 @@ import java.util.Random;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
@ -57,6 +58,9 @@ public class TestPackedInts extends LuceneTestCase {
for(int nbits=1;nbits<=64;nbits++) { for(int nbits=1;nbits<=64;nbits++) {
final long maxValue = PackedInts.maxValue(nbits); final long maxValue = PackedInts.maxValue(nbits);
final int valueCount = 100+random().nextInt(500); final int valueCount = 100+random().nextInt(500);
final int bufferSize = random().nextBoolean()
? _TestUtil.nextInt(random(), 0, 48)
: _TestUtil.nextInt(random(), 0, 4096);
final Directory d = newDirectory(); final Directory d = newDirectory();
IndexOutput out = d.createOutput("out.bin", newIOContext(random())); IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
@ -79,10 +83,10 @@ public class TestPackedInts extends LuceneTestCase {
// ensure that finish() added the (valueCount-actualValueCount) missing values // ensure that finish() added the (valueCount-actualValueCount) missing values
final long bytes; final long bytes;
switch (w.getFormat()) { switch (w.getFormat()) {
case PackedInts.PACKED: case PACKED:
bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3; bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
break; break;
case PackedInts.PACKED_SINGLE_BLOCK: case PACKED_SINGLE_BLOCK:
final int valuesPerBlock = 64 / w.bitsPerValue; final int valuesPerBlock = 64 / w.bitsPerValue;
bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3; bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
break; break;
@ -97,7 +101,7 @@ public class TestPackedInts extends LuceneTestCase {
CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header
assertEquals(w.bitsPerValue, in.readVInt()); assertEquals(w.bitsPerValue, in.readVInt());
assertEquals(valueCount, in.readVInt()); assertEquals(valueCount, in.readVInt());
assertEquals(w.getFormat(), in.readVInt()); assertEquals(w.getFormat().getId(), in.readVInt());
assertEquals(startFp, in.getFilePointer()); assertEquals(startFp, in.getFilePointer());
in.close(); in.close();
} }
@ -113,37 +117,34 @@ public class TestPackedInts extends LuceneTestCase {
} }
in.close(); in.close();
} }
{ // test reader iterator next { // test reader iterator next
IndexInput in = d.openInput("out.bin", newIOContext(random())); IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in); PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
for(int i=0;i<valueCount;i++) { for(int i=0;i<valueCount;i++) {
assertEquals("index=" + i + " valueCount=" assertEquals("index=" + i + " valueCount="
+ valueCount + " nbits=" + nbits + " for " + valueCount + " nbits=" + nbits + " for "
+ r.getClass().getSimpleName(), values[i], r.next()); + r.getClass().getSimpleName(), values[i], r.next());
assertEquals(i, r.ord());
} }
assertEquals(fp, in.getFilePointer()); assertEquals(fp, in.getFilePointer());
in.close(); in.close();
} }
{ // test reader iterator next vs. advance
{ // test reader iterator bulk next
IndexInput in = d.openInput("out.bin", newIOContext(random())); IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.ReaderIterator intsEnum = PackedInts.getReaderIterator(in); PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
for (int i = 0; i < valueCount; i += int i = 0;
1 + ((valueCount - i) <= 20 ? random().nextInt(valueCount - i) while (i < valueCount) {
: random().nextInt(20))) { final int count = _TestUtil.nextInt(random(), 1, 95);
final String msg = "index=" + i + " valueCount=" final LongsRef next = r.next(count);
+ valueCount + " nbits=" + nbits + " for " for (int k = 0; k < next.length; ++k) {
+ intsEnum.getClass().getSimpleName(); assertEquals("index=" + i + " valueCount="
if (i - intsEnum.ord() == 1 && random().nextBoolean()) { + valueCount + " nbits=" + nbits + " for "
assertEquals(msg, values[i], intsEnum.next()); + r.getClass().getSimpleName(), values[i + k], next.longs[next.offset + k]);
} else {
assertEquals(msg, values[i], intsEnum.advance(i));
} }
assertEquals(msg, i, intsEnum.ord()); i += next.length;
} }
if (intsEnum.ord() < valueCount - 1)
assertEquals(values[valueCount - 1], intsEnum
.advance(valueCount - 1));
assertEquals(valueCount - 1, intsEnum.ord());
assertEquals(fp, in.getFilePointer()); assertEquals(fp, in.getFilePointer());
in.close(); in.close();
} }