mirror of https://github.com/apache/lucene.git
LUCENE-4161: Make packed int arrays usable by codecs.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1357159 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9250082566
commit
e96b143b6a
|
@ -315,10 +315,10 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
try {
|
||||
// Subsample the index terms
|
||||
clone1.seek(packedIndexStart);
|
||||
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
|
||||
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
|
||||
clone2.seek(packedOffsetsStart);
|
||||
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
|
||||
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
|
||||
// TODO: often we can get by w/ fewer bits per
|
||||
// value, below.. .but this'd be more complex:
|
||||
|
|
|
@ -121,7 +121,7 @@ class VarStraightBytesImpl {
|
|||
final IndexInput cloneIdx = reader.cloneIndex();
|
||||
try {
|
||||
numDataBytes = cloneIdx.readVLong();
|
||||
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
|
||||
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
for (int i = 0; i < maxDocs; i++) {
|
||||
long offset = iter.next();
|
||||
++lastDocID;
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Represents long[], as a slice (offset + length) into an
|
||||
* existing long[]. The {@link #longs} member should never be null; use
|
||||
* {@link #EMPTY_LONGS} if necessary.
|
||||
*
|
||||
* @lucene.internal */
|
||||
public final class LongsRef implements Comparable<LongsRef>, Cloneable {
|
||||
|
||||
public static final long[] EMPTY_LONGS = new long[0];
|
||||
|
||||
public long[] longs;
|
||||
public int offset;
|
||||
public int length;
|
||||
|
||||
public LongsRef() {
|
||||
longs = EMPTY_LONGS;
|
||||
}
|
||||
|
||||
public LongsRef(int capacity) {
|
||||
longs = new long[capacity];
|
||||
}
|
||||
|
||||
public LongsRef(long[] longs, int offset, int length) {
|
||||
assert longs != null;
|
||||
assert offset >= 0;
|
||||
assert length >= 0;
|
||||
assert longs.length >= offset + length;
|
||||
this.longs = longs;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongsRef clone() {
|
||||
return new LongsRef(longs, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 0;
|
||||
final long end = offset + length;
|
||||
for(int i = offset; i < end; i++) {
|
||||
result = prime * result + (int) (longs[i] ^ (longs[i]>>>32));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) {
|
||||
return false;
|
||||
}
|
||||
if (other instanceof LongsRef) {
|
||||
return this.longsEquals((LongsRef) other);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean longsEquals(LongsRef other) {
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
final long[] otherInts = other.longs;
|
||||
final long end = offset + length;
|
||||
for(int upto=offset; upto<end; upto++,otherUpto++) {
|
||||
if (longs[upto] != otherInts[otherUpto]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Signed int order comparison */
|
||||
public int compareTo(LongsRef other) {
|
||||
if (this == other) return 0;
|
||||
|
||||
final long[] aInts = this.longs;
|
||||
int aUpto = this.offset;
|
||||
final long[] bInts = other.longs;
|
||||
int bUpto = other.offset;
|
||||
|
||||
final long aStop = aUpto + Math.min(this.length, other.length);
|
||||
|
||||
while(aUpto < aStop) {
|
||||
long aInt = aInts[aUpto++];
|
||||
long bInt = bInts[bUpto++];
|
||||
if (aInt > bInt) {
|
||||
return 1;
|
||||
} else if (aInt < bInt) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// One is a prefix of the other, or, they are equal:
|
||||
return this.length - other.length;
|
||||
}
|
||||
|
||||
public void copyLongs(LongsRef other) {
|
||||
if (longs.length - offset < other.length) {
|
||||
longs = new long[other.length];
|
||||
offset = 0;
|
||||
}
|
||||
System.arraycopy(other.longs, other.offset, longs, offset, other.length);
|
||||
length = other.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to grow the reference array.
|
||||
*
|
||||
* In general this should not be used as it does not take the offset into account.
|
||||
* @lucene.internal */
|
||||
public void grow(int newLength) {
|
||||
assert offset == 0;
|
||||
if (longs.length < newLength) {
|
||||
longs = ArrayUtil.grow(longs, newLength);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append('[');
|
||||
final long end = offset + length;
|
||||
for(int i=offset;i<end;i++) {
|
||||
if (i > offset) {
|
||||
sb.append(' ');
|
||||
}
|
||||
sb.append(Long.toHexString(longs[i]));
|
||||
}
|
||||
sb.append(']');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new IntsRef that points to a copy of the longs from
|
||||
* <code>other</code>
|
||||
* <p>
|
||||
* The returned IntsRef will have a length of other.length
|
||||
* and an offset of zero.
|
||||
*/
|
||||
public static LongsRef deepCopyOf(LongsRef other) {
|
||||
LongsRef clone = new LongsRef();
|
||||
clone.copyLongs(other);
|
||||
return clone;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,5 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
|
@ -24,62 +26,37 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Direct wrapping of 16 bit values to a backing array of shorts.
|
||||
* Direct wrapping of 16-bits values to a backing array.
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class Direct16 extends PackedInts.MutableImpl {
|
||||
final short[] values;
|
||||
|
||||
class Direct16 extends PackedInts.MutableImpl {
|
||||
private final short[] values;
|
||||
private static final int BITS_PER_VALUE = 16;
|
||||
|
||||
public Direct16(int valueCount) {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
Direct16(int valueCount) {
|
||||
super(valueCount, 16);
|
||||
values = new short[valueCount];
|
||||
}
|
||||
|
||||
public Direct16(DataInput in, int valueCount) throws IOException {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
short[] values = new short[valueCount];
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
Direct16(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
values[i] = in.readShort();
|
||||
}
|
||||
final int mod = valueCount % 4;
|
||||
if (mod != 0) {
|
||||
final int pad = 4-mod;
|
||||
// round out long
|
||||
for(int i=0;i<pad;i++) {
|
||||
for (int i = mod; i < 4; ++i) {
|
||||
in.readShort();
|
||||
}
|
||||
}
|
||||
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array backed by the given values.
|
||||
* </p><p>
|
||||
* Note: The values are used directly, so changes to the values will
|
||||
* affect the structure.
|
||||
* @param values used as the internal backing array.
|
||||
*/
|
||||
public Direct16(short[] values) {
|
||||
super(values.length, BITS_PER_VALUE);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public long get(final int index) {
|
||||
assert index >= 0 && index < size();
|
||||
return 0xFFFFL & values[index];
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (short)(value & 0xFFFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert (val & 0xffffL) == val;
|
||||
Arrays.fill(values, fromIndex, toIndex, (short) val);
|
||||
public long get(final int index) {
|
||||
return values[index] & 0xFFFFL;
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (short) (value);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
|
@ -87,7 +64,7 @@ class Direct16 extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(values, (short)0);
|
||||
Arrays.fill(values, (short) 0L);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -99,4 +76,35 @@ class Direct16 extends PackedInts.MutableImpl {
|
|||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = values[i] & 0xFFFFL;
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||
values[i] = (short) arr[o];
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val == (val & 0xFFFFL);
|
||||
Arrays.fill(values, fromIndex, toIndex, (short) val);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
|
@ -24,58 +26,37 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Direct wrapping of 32 bit values to a backing array of ints.
|
||||
* Direct wrapping of 32-bits values to a backing array.
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class Direct32 extends PackedInts.MutableImpl {
|
||||
final int[] values;
|
||||
|
||||
class Direct32 extends PackedInts.MutableImpl {
|
||||
private final int[] values;
|
||||
private static final int BITS_PER_VALUE = 32;
|
||||
|
||||
public Direct32(int valueCount) {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
Direct32(int valueCount) {
|
||||
super(valueCount, 32);
|
||||
values = new int[valueCount];
|
||||
}
|
||||
|
||||
public Direct32(DataInput in, int valueCount) throws IOException {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
int[] values = new int[valueCount];
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
Direct32(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
values[i] = in.readInt();
|
||||
}
|
||||
final int mod = valueCount % 2;
|
||||
if (mod != 0) {
|
||||
for (int i = mod; i < 2; ++i) {
|
||||
in.readInt();
|
||||
}
|
||||
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array backed by the given values.
|
||||
* </p><p>
|
||||
* Note: The values are used directly, so changes to the given values will
|
||||
* affect the structure.
|
||||
* @param values used as the internal backing array.
|
||||
*/
|
||||
public Direct32(int[] values) {
|
||||
super(values.length, BITS_PER_VALUE);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public long get(final int index) {
|
||||
assert index >= 0 && index < size();
|
||||
return 0xFFFFFFFFL & values[index];
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (int)(value & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert (val & 0xffffffffL) == val;
|
||||
Arrays.fill(values, fromIndex, toIndex, (int) val);
|
||||
public long get(final int index) {
|
||||
return values[index] & 0xFFFFFFFFL;
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (int) (value);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
|
@ -83,11 +64,11 @@ class Direct32 extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(values, 0);
|
||||
Arrays.fill(values, (int) 0L);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getArray() {
|
||||
public Object getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
|
@ -95,4 +76,35 @@ class Direct32 extends PackedInts.MutableImpl {
|
|||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = values[i] & 0xFFFFFFFFL;
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||
values[i] = (int) arr[o];
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val == (val & 0xFFFFFFFFL);
|
||||
Arrays.fill(values, fromIndex, toIndex, (int) val);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
|
@ -24,70 +26,31 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Direct wrapping of 64 bit values to a backing array of longs.
|
||||
* Direct wrapping of 64-bits values to a backing array.
|
||||
* @lucene.internal
|
||||
*/
|
||||
class Direct64 extends PackedInts.MutableImpl {
|
||||
private final long[] values;
|
||||
private static final int BITS_PER_VALUE = 64;
|
||||
final class Direct64 extends PackedInts.MutableImpl {
|
||||
final long[] values;
|
||||
|
||||
public Direct64(int valueCount) {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
Direct64(int valueCount) {
|
||||
super(valueCount, 64);
|
||||
values = new long[valueCount];
|
||||
}
|
||||
|
||||
public Direct64(DataInput in, int valueCount) throws IOException {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
long[] values = new long[valueCount];
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
Direct64(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
values[i] = in.readLong();
|
||||
}
|
||||
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array backed by the given values.
|
||||
* </p><p>
|
||||
* Note: The values are used directly, so changes to the given values will
|
||||
* affect the structure.
|
||||
* @param values used as the internal backing array.
|
||||
*/
|
||||
public Direct64(long[] values) {
|
||||
super(values.length, BITS_PER_VALUE);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(final int index) {
|
||||
assert index >= 0 && index < size();
|
||||
return values[index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(values, index, arr, off, gets);
|
||||
return gets;
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(arr, off, values, index, sets);
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
Arrays.fill(values, fromIndex, toIndex, val);
|
||||
values[index] = (value);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
|
@ -99,7 +62,7 @@ class Direct64 extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
public long[] getArray() {
|
||||
public Object getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
|
@ -108,4 +71,29 @@ class Direct64 extends PackedInts.MutableImpl {
|
|||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(values, index, arr, off, gets);
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(arr, off, values, index, sets);
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
Arrays.fill(values, fromIndex, toIndex, val);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
|
@ -24,63 +26,37 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Direct wrapping of 8 bit values to a backing array of bytes.
|
||||
* Direct wrapping of 8-bits values to a backing array.
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class Direct8 extends PackedInts.MutableImpl {
|
||||
final byte[] values;
|
||||
|
||||
class Direct8 extends PackedInts.MutableImpl {
|
||||
private final byte[] values;
|
||||
private static final int BITS_PER_VALUE = 8;
|
||||
|
||||
public Direct8(int valueCount) {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
Direct8(int valueCount) {
|
||||
super(valueCount, 8);
|
||||
values = new byte[valueCount];
|
||||
}
|
||||
|
||||
public Direct8(DataInput in, int valueCount)
|
||||
throws IOException {
|
||||
super(valueCount, BITS_PER_VALUE);
|
||||
byte[] values = new byte[valueCount];
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
Direct8(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
values[i] = in.readByte();
|
||||
}
|
||||
final int mod = valueCount % 8;
|
||||
if (mod != 0) {
|
||||
final int pad = 8-mod;
|
||||
// round out long
|
||||
for(int i=0;i<pad;i++) {
|
||||
for (int i = mod; i < 8; ++i) {
|
||||
in.readByte();
|
||||
}
|
||||
}
|
||||
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array backed by the given values.
|
||||
* </p><p>
|
||||
* Note: The values are used directly, so changes to the given values will
|
||||
* affect the structure.
|
||||
* @param values used as the internal backing array.
|
||||
*/
|
||||
public Direct8(byte[] values) {
|
||||
super(values.length, BITS_PER_VALUE);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public long get(final int index) {
|
||||
assert index >= 0 && index < size();
|
||||
return 0xFFL & values[index];
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (byte)(value & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert (val & 0xffL) == val;
|
||||
Arrays.fill(values, fromIndex, toIndex, (byte) val);
|
||||
public long get(final int index) {
|
||||
return values[index] & 0xFFL;
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = (byte) (value);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
|
@ -88,7 +64,7 @@ class Direct8 extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(values, (byte)0);
|
||||
Arrays.fill(values, (byte) 0L);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -100,4 +76,35 @@ class Direct8 extends PackedInts.MutableImpl {
|
|||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = values[i] & 0xFFL;
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||
values[i] = (byte) arr[o];
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val == (val & 0xFFL);
|
||||
Arrays.fill(values, fromIndex, toIndex, (byte) val);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -23,31 +19,37 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 48 bitsPerValue backed by short[] */
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Packs integers into 3 shorts (48 bits per value).
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
|
||||
final short[] blocks;
|
||||
|
||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||
|
||||
private final short[] blocks;
|
||||
|
||||
Packed16ThreeBlocks(int valueCount) {
|
||||
super(valueCount, 48);
|
||||
if (valueCount > MAX_SIZE) {
|
||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||
}
|
||||
this.blocks = new short[3 * valueCount];
|
||||
blocks = new short[valueCount * 3];
|
||||
}
|
||||
|
||||
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
for (int i = 0; i < 3 * valueCount; ++i) {
|
||||
blocks[i] = in.readShort();
|
||||
}
|
||||
final int mod = blocks.length % 4;
|
||||
if (mod != 0) {
|
||||
final int pad = 4 - mod;
|
||||
// round out long
|
||||
for (int i = 0; i < pad; i++) {
|
||||
for (int i = mod; i < 4; ++i) {
|
||||
in.readShort();
|
||||
}
|
||||
}
|
||||
|
@ -56,26 +58,55 @@ final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
|
|||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index * 3;
|
||||
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
|
||||
return (blocks[o] & 0xFFFFL) << 32 | (blocks[o+1] & 0xFFFFL) << 16 | (blocks[o+2] & 0xFFFFL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||
arr[off++] = (blocks[i] & 0xFFFFL) << 32 | (blocks[i+1] & 0xFFFFL) << 16 | (blocks[i+2] & 0xFFFFL);
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index * 3;
|
||||
blocks[o] = (short) (value >> 32);
|
||||
blocks[o+1] = (short) (value >> 16);
|
||||
blocks[o] = (short) (value >>> 32);
|
||||
blocks[o+1] = (short) (value >>> 16);
|
||||
blocks[o+2] = (short) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||
final long value = arr[i];
|
||||
blocks[o++] = (short) (value >>> 32);
|
||||
blocks[o++] = (short) (value >>> 16);
|
||||
blocks[o++] = (short) value;
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
short block1 = (short) (val >> 32);
|
||||
short block2 = (short) (val >> 16);
|
||||
short block3 = (short) val;
|
||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
|
||||
blocks[i++] = block1;
|
||||
blocks[i++] = block2;
|
||||
blocks[i++] = block3;
|
||||
final short block1 = (short) (val >>> 32);
|
||||
final short block2 = (short) (val >>> 16);
|
||||
final short block3 = (short) val;
|
||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||
blocks[i] = block1;
|
||||
blocks[i+1] = block2;
|
||||
blocks[i+2] = block3;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -138,6 +138,49 @@ class Packed64 extends PackedInts.MutableImpl {
|
|||
& maskRight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % op.values();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
|
||||
arr[off++] = get(index++);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk get
|
||||
assert index % op.values() == 0;
|
||||
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / op.values();
|
||||
op.get(blocks, blockIndex, arr, off, iterations);
|
||||
final int gotValues = iterations * op.values();
|
||||
index += gotValues;
|
||||
len -= gotValues;
|
||||
assert len >= 0;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to get
|
||||
assert index == originalIndex;
|
||||
return super.get(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(final int index, final long value) {
|
||||
// The abstract index in a contiguous bit stream
|
||||
|
@ -159,6 +202,48 @@ class Packed64 extends PackedInts.MutableImpl {
|
|||
| (value << (BLOCK_SIZE - endBits));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % op.values();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
|
||||
set(index++, arr[off++]);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk get
|
||||
assert index % op.values() == 0;
|
||||
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / op.values();
|
||||
op.set(blocks, blockIndex, arr, off, iterations);
|
||||
final int setValues = iterations * op.values();
|
||||
index += setValues;
|
||||
len -= setValues;
|
||||
assert len >= 0;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to get
|
||||
assert index == originalIndex;
|
||||
return super.set(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -39,12 +39,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||
}
|
||||
|
||||
public static float overheadPerValue(int bitsPerValue) {
|
||||
int valuesPerBlock = 64 / bitsPerValue;
|
||||
int overhead = 64 % bitsPerValue;
|
||||
return (float) overhead / valuesPerBlock;
|
||||
}
|
||||
|
||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||
return valueCount / valuesPerBlock
|
||||
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
||||
|
@ -92,16 +86,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
|
||||
// bulk get
|
||||
assert index % valuesPerBlock == 0;
|
||||
final long readMask = (1L << bitsPerValue) - 1;
|
||||
final int startBlock = index / valuesPerBlock;
|
||||
final int endBlock = (index + len) / valuesPerBlock;
|
||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.blocks() == 1;
|
||||
assert op.values() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.get(blocks, blockIndex, arr, off, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff; len -= diff;
|
||||
for (int block = startBlock; block < endBlock; ++block) {
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
|
||||
}
|
||||
}
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
|
@ -138,17 +130,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
|
||||
// bulk set
|
||||
assert index % valuesPerBlock == 0;
|
||||
final int startBlock = index / valuesPerBlock;
|
||||
final int endBlock = (index + len) / valuesPerBlock;
|
||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.blocks() == 1;
|
||||
assert op.values() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.set(blocks, blockIndex, arr, off, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff; len -= diff;
|
||||
for (int block = startBlock; block < endBlock; ++block) {
|
||||
long next = 0L;
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
next |= (arr[off++] << (i * bitsPerValue));
|
||||
}
|
||||
blocks[block] = next;
|
||||
}
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
|
@ -202,8 +191,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||
protected PackedInts.Format getFormat() {
|
||||
return PackedInts.Format.PACKED_SINGLE_BLOCK;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,87 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
|
||||
|
||||
private long pending;
|
||||
private int shift;
|
||||
private final long mask;
|
||||
private int position;
|
||||
|
||||
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
|
||||
super(valueCount, bitsPerValue, in);
|
||||
pending = 0;
|
||||
shift = 64;
|
||||
mask = ~(~0L << bitsPerValue);
|
||||
position = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long next() throws IOException {
|
||||
if (shift + bitsPerValue > 64) {
|
||||
pending = in.readLong();
|
||||
shift = 0;
|
||||
}
|
||||
final long next = (pending >>> shift) & mask;
|
||||
shift += bitsPerValue;
|
||||
++position;
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long advance(int ord) throws IOException {
|
||||
assert ord < valueCount : "ord must be less than valueCount";
|
||||
assert ord > position : "ord must be greater than the current position";
|
||||
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
|
||||
final long targetBlock = ord / valuesPerBlock;
|
||||
final long blocksToSkip = targetBlock - nextBlock;
|
||||
if (blocksToSkip > 0) {
|
||||
final long skip = blocksToSkip << 3;
|
||||
final long filePointer = in.getFilePointer();
|
||||
|
||||
in.seek(filePointer + skip);
|
||||
shift = 64;
|
||||
|
||||
final int offsetInBlock = ord % valuesPerBlock;
|
||||
for (int i = 0; i < offsetInBlock; ++i) {
|
||||
next();
|
||||
}
|
||||
} else {
|
||||
for (int i = position; i < ord - 1; ++i) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
position = ord - 1;
|
||||
return next();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.packed.PackedInts.Writer;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link Writer} for {@link Packed64SingleBlock} readers.
|
||||
*/
|
||||
final class Packed64SingleBlockWriter extends Writer {
|
||||
|
||||
private long pending;
|
||||
private int shift;
|
||||
private int written;
|
||||
|
||||
Packed64SingleBlockWriter(DataOutput out, int valueCount,
|
||||
int bitsPerValue) throws IOException {
|
||||
super(out, valueCount, bitsPerValue);
|
||||
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
|
||||
pending = 0;
|
||||
shift = 0;
|
||||
written = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(long v) throws IOException {
|
||||
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
||||
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
||||
assert v >= 0;
|
||||
|
||||
if (shift + bitsPerValue > Long.SIZE) {
|
||||
out.writeLong(pending);
|
||||
pending = 0;
|
||||
shift = 0;
|
||||
}
|
||||
pending |= v << shift;
|
||||
shift += bitsPerValue;
|
||||
++written;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
while (written < valueCount) {
|
||||
add(0L); // Auto flush
|
||||
}
|
||||
|
||||
if (shift > 0) {
|
||||
// add was called at least once
|
||||
out.writeLong(pending);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
|
||||
+ bitsPerValue + " bits/value)";
|
||||
}
|
||||
}
|
|
@ -1,11 +1,7 @@
|
|||
// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -23,31 +19,37 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 24 bitsPerValue backed by byte[] */
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Packs integers into 3 bytes (24 bits per value).
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
||||
final byte[] blocks;
|
||||
|
||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||
|
||||
private final byte[] blocks;
|
||||
|
||||
Packed8ThreeBlocks(int valueCount) {
|
||||
super(valueCount, 24);
|
||||
if (valueCount > MAX_SIZE) {
|
||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||
}
|
||||
this.blocks = new byte[3 * valueCount];
|
||||
blocks = new byte[valueCount * 3];
|
||||
}
|
||||
|
||||
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||
this(valueCount);
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
for (int i = 0; i < 3 * valueCount; ++i) {
|
||||
blocks[i] = in.readByte();
|
||||
}
|
||||
final int mod = blocks.length % 8;
|
||||
if (mod != 0) {
|
||||
final int pad = 8 - mod;
|
||||
// round out long
|
||||
for (int i = 0; i < pad; i++) {
|
||||
for (int i = mod; i < 8; ++i) {
|
||||
in.readByte();
|
||||
}
|
||||
}
|
||||
|
@ -56,26 +58,55 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
|||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index * 3;
|
||||
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
|
||||
return (blocks[o] & 0xFFL) << 16 | (blocks[o+1] & 0xFFL) << 8 | (blocks[o+2] & 0xFFL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||
arr[off++] = (blocks[i] & 0xFFL) << 16 | (blocks[i+1] & 0xFFL) << 8 | (blocks[i+2] & 0xFFL);
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index * 3;
|
||||
blocks[o] = (byte) (value >>> 16);
|
||||
blocks[o+1] = (byte) (value >>> 8);
|
||||
blocks[o+2] = (byte) value;
|
||||
blocks[o+1] = (byte) (value >> 8);
|
||||
blocks[o] = (byte) (value >> 16);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||
final long value = arr[i];
|
||||
blocks[o++] = (byte) (value >>> 16);
|
||||
blocks[o++] = (byte) (value >>> 8);
|
||||
blocks[o++] = (byte) value;
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
byte block1 = (byte) (val >> 16);
|
||||
byte block2 = (byte) (val >> 8);
|
||||
byte block3 = (byte) val;
|
||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
|
||||
blocks[i++] = block1;
|
||||
blocks[i++] = block2;
|
||||
blocks[i++] = block3;
|
||||
final byte block1 = (byte) (val >>> 16);
|
||||
final byte block2 = (byte) (val >>> 8);
|
||||
final byte block3 = (byte) val;
|
||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||
blocks[i] = block1;
|
||||
blocks[i+1] = block2;
|
||||
blocks[i+2] = block3;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,5 +124,4 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
|||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -34,7 +35,6 @@ import java.io.IOException;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
|
||||
public class PackedInts {
|
||||
|
||||
/**
|
||||
|
@ -62,12 +62,184 @@ public class PackedInts {
|
|||
*/
|
||||
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
|
||||
|
||||
final static String CODEC_NAME = "PackedInts";
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
public final static String CODEC_NAME = "PackedInts";
|
||||
public final static int VERSION_START = 0;
|
||||
public final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static final int PACKED = 0;
|
||||
static final int PACKED_SINGLE_BLOCK = 1;
|
||||
/**
|
||||
* A format to write packed ints.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public enum Format {
|
||||
/**
|
||||
* Compact format, all bits are written contiguously.
|
||||
*/
|
||||
PACKED(0) {
|
||||
|
||||
@Override
|
||||
public int nblocks(int bitsPerValue, int values) {
|
||||
return (int) Math.ceil((double) values * bitsPerValue / 64);
|
||||
}
|
||||
|
||||
},
|
||||
|
||||
/**
|
||||
* A format that may insert padding bits to improve encoding and decoding
|
||||
* speed. Since this format doesn't support all possible bits per value, you
|
||||
* should never use it directly, but rather use
|
||||
* {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the
|
||||
* format that best suits your needs.
|
||||
*/
|
||||
PACKED_SINGLE_BLOCK(1) {
|
||||
|
||||
@Override
|
||||
public int nblocks(int bitsPerValue, int values) {
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
return (int) Math.ceil((double) values / valuesPerBlock);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSupported(int bitsPerValue) {
|
||||
return Packed64SingleBlock.isSupported(bitsPerValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float overheadPerValue(int bitsPerValue) {
|
||||
assert isSupported(bitsPerValue);
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
final int overhead = 64 % bitsPerValue;
|
||||
return (float) overhead / valuesPerBlock;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a format according to its ID.
|
||||
*/
|
||||
public static Format byId(int id) {
|
||||
for (Format format : Format.values()) {
|
||||
if (format.getId() == id) {
|
||||
return format;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown format id: " + id);
|
||||
}
|
||||
|
||||
private Format(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public int id;
|
||||
|
||||
/**
|
||||
* Returns the ID of the format.
|
||||
*/
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes how many blocks are needed to store <code>values</code> values
|
||||
* of size <code>bitsPerValue</code>.
|
||||
*/
|
||||
public abstract int nblocks(int bitsPerValue, int values);
|
||||
|
||||
/**
|
||||
* Tests whether the provided number of bits per value is supported by the
|
||||
* format.
|
||||
*/
|
||||
public boolean isSupported(int bitsPerValue) {
|
||||
return bitsPerValue >= 1 && bitsPerValue <= 64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the overhead per value, in bits.
|
||||
*/
|
||||
public float overheadPerValue(int bitsPerValue) {
|
||||
assert isSupported(bitsPerValue);
|
||||
return 0f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the overhead ratio (<code>overhead per value / bits per value</code>).
|
||||
*/
|
||||
public final float overheadRatio(int bitsPerValue) {
|
||||
assert isSupported(bitsPerValue);
|
||||
return overheadPerValue(bitsPerValue) / bitsPerValue;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple class that holds a format and a number of bits per value.
|
||||
*/
|
||||
public static class FormatAndBits {
|
||||
public final Format format;
|
||||
public final int bitsPerValue;
|
||||
public FormatAndBits(Format format, int bitsPerValue) {
|
||||
this.format = format;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find the {@link Format} and number of bits per value that would
|
||||
* restore from disk the fastest reader whose overhead is less than
|
||||
* <code>acceptableOverheadRatio</code>.
|
||||
* </p><p>
|
||||
* The <code>acceptableOverheadRatio</code> parameter makes sense for
|
||||
* random-access {@link Reader}s. In case you only plan to perform
|
||||
* sequential access on this stream later on, you should probably use
|
||||
* {@link PackedInts#COMPACT}.
|
||||
* </p><p>
|
||||
* If you don't know how many values you are going to write, use
|
||||
* <code>valueCount = -1</code>.
|
||||
*/
|
||||
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
|
||||
if (valueCount == -1) {
|
||||
valueCount = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
int actualBitsPerValue = -1;
|
||||
Format format = Format.PACKED;
|
||||
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
actualBitsPerValue = 8;
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
actualBitsPerValue = 16;
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
actualBitsPerValue = 32;
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
actualBitsPerValue = 64;
|
||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||
actualBitsPerValue = 24;
|
||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||
actualBitsPerValue = 48;
|
||||
} else {
|
||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||
if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) {
|
||||
float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv);
|
||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||
if (overhead <= acceptableOverhead) {
|
||||
actualBitsPerValue = bpv;
|
||||
format = Format.PACKED_SINGLE_BLOCK;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (actualBitsPerValue < 0) {
|
||||
actualBitsPerValue = bitsPerValue;
|
||||
}
|
||||
}
|
||||
|
||||
return new FormatAndBits(format, actualBitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* A read-only random access array of positive integers.
|
||||
|
@ -132,30 +304,39 @@ public class PackedInts {
|
|||
public static interface ReaderIterator extends Closeable {
|
||||
/** Returns next value */
|
||||
long next() throws IOException;
|
||||
/** Returns at least 1 and at most <code>count</code> next values,
|
||||
* the returned ref MUST NOT be modified */
|
||||
LongsRef next(int count) throws IOException;
|
||||
/** Returns number of bits per value */
|
||||
int getBitsPerValue();
|
||||
/** Returns number of values */
|
||||
int size();
|
||||
/** Returns the current position */
|
||||
int ord();
|
||||
/** Skips to the given ordinal and returns its value.
|
||||
* @return the value at the given position
|
||||
* @throws IOException if reading the value throws an IOException*/
|
||||
long advance(int ord) throws IOException;
|
||||
}
|
||||
|
||||
static abstract class ReaderIteratorImpl implements ReaderIterator {
|
||||
|
||||
protected final IndexInput in;
|
||||
protected final DataInput in;
|
||||
protected final int bitsPerValue;
|
||||
protected final int valueCount;
|
||||
|
||||
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
|
||||
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) {
|
||||
this.in = in;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
this.valueCount = valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long next() throws IOException {
|
||||
LongsRef nextValues = next(1);
|
||||
assert nextValues.length > 0;
|
||||
final long result = nextValues.longs[nextValues.offset];
|
||||
++nextValues.offset;
|
||||
--nextValues.length;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
|
@ -168,7 +349,9 @@ public class PackedInts {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
if (in instanceof Closeable) {
|
||||
((Closeable) in).close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,7 +400,7 @@ public class PackedInts {
|
|||
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static abstract class ReaderImpl implements Reader {
|
||||
static abstract class ReaderImpl implements Reader {
|
||||
protected final int bitsPerValue;
|
||||
protected final int valueCount;
|
||||
|
||||
|
@ -257,7 +440,7 @@ public class PackedInts {
|
|||
|
||||
}
|
||||
|
||||
public static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
||||
static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
||||
|
||||
protected MutableImpl(int valueCount, int bitsPerValue) {
|
||||
super(valueCount, bitsPerValue);
|
||||
|
@ -283,13 +466,15 @@ public class PackedInts {
|
|||
}
|
||||
}
|
||||
|
||||
protected int getFormat() {
|
||||
return PACKED;
|
||||
protected Format getFormat() {
|
||||
return Format.PACKED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(DataOutput out) throws IOException {
|
||||
Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
|
||||
Writer writer = getWriterNoHeader(out, getFormat(),
|
||||
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
writer.add(get(i));
|
||||
}
|
||||
|
@ -302,121 +487,209 @@ public class PackedInts {
|
|||
*/
|
||||
public static abstract class Writer {
|
||||
protected final DataOutput out;
|
||||
protected final int bitsPerValue;
|
||||
protected final int valueCount;
|
||||
protected final int bitsPerValue;
|
||||
|
||||
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
|
||||
throws IOException {
|
||||
assert bitsPerValue <= 64;
|
||||
|
||||
assert valueCount >= 0 || valueCount == -1;
|
||||
this.out = out;
|
||||
this.valueCount = valueCount;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
}
|
||||
|
||||
void writeHeader() throws IOException {
|
||||
assert valueCount != -1;
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
out.writeVInt(bitsPerValue);
|
||||
out.writeVInt(valueCount);
|
||||
out.writeVInt(getFormat());
|
||||
out.writeVInt(getFormat().getId());
|
||||
}
|
||||
|
||||
protected abstract int getFormat();
|
||||
/** The format used to serialize values. */
|
||||
protected abstract PackedInts.Format getFormat();
|
||||
|
||||
/** Add a value to the stream. */
|
||||
public abstract void add(long v) throws IOException;
|
||||
|
||||
/** The number of bits per value. */
|
||||
public final int bitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
/** Perform end-of-stream operations. */
|
||||
public abstract void finish() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the current ord in the stream (number of values that have been
|
||||
* written so far minus one).
|
||||
*/
|
||||
public abstract int ord();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve PackedInt data from the DataInput and return a packed int
|
||||
* structure based on it.
|
||||
* Expert: Restore a {@link Reader} from a stream without reading metadata at
|
||||
* the beginning of the stream. This method is useful to restore data from
|
||||
* streams which have been created using
|
||||
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @return a read only random access capable array of positive integers.
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return a Reader
|
||||
* @throws IOException
|
||||
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Reader getReader(DataInput in) throws IOException {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final int format = in.readVInt();
|
||||
|
||||
public static Reader getReaderNoHeader(DataInput in, Format format, int version,
|
||||
int valueCount, int bitsPerValue) throws IOException {
|
||||
switch (format) {
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||
case PACKED:
|
||||
switch (bitsPerValue) {
|
||||
case 8:
|
||||
return new Direct8(in, valueCount);
|
||||
case 16:
|
||||
return new Direct16(in, valueCount);
|
||||
case 24:
|
||||
return new Packed8ThreeBlocks(in, valueCount);
|
||||
case 32:
|
||||
return new Direct32(in, valueCount);
|
||||
case 48:
|
||||
return new Packed16ThreeBlocks(in, valueCount);
|
||||
case 64:
|
||||
return new Direct64(in, valueCount);
|
||||
default:
|
||||
return new Packed64(in, valueCount, bitsPerValue);
|
||||
case 24:
|
||||
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||
return new Packed8ThreeBlocks(in, valueCount);
|
||||
}
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||
break;
|
||||
case 48:
|
||||
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||
return new Packed16ThreeBlocks(in, valueCount);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return new Packed64(in, valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore a {@link Reader} from a stream.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @return a Reader
|
||||
* @throws IOException
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Reader getReader(DataInput in) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
|
||||
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Restore a {@link ReaderIterator} from a stream without reading
|
||||
* metadata at the beginning of the stream. This method is useful to restore
|
||||
* data from streams which have been created using
|
||||
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
|
||||
* @return a ReaderIterator
|
||||
* @throws IOException
|
||||
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
|
||||
int valueCount, int bitsPerValue, int mem) throws IOException {
|
||||
return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve PackedInts as a {@link ReaderIterator}
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
|
||||
* @return an iterator to access the values
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final int format = in.readVInt();
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new PackedReaderIterator(valueCount, bitsPerValue, in);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
}
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve PackedInts.Reader that does not load values
|
||||
* into RAM but rather accesses all values via the
|
||||
* provided IndexInput.
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @return an Reader to access the values
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
* Expert: Construct a direct {@link Reader} from a stream without reading
|
||||
* metadata at the beginning of the stream. This method is useful to restore
|
||||
* data from streams which have been created using
|
||||
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
* </p><p>
|
||||
* The returned reader will have very little memory overhead, but every call
|
||||
* to {@link Reader#get(int)} is likely to perform a disk seek.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return a direct Reader
|
||||
* @throws IOException
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Reader getDirectReader(IndexInput in) throws IOException {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final int format = in.readVInt();
|
||||
public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
|
||||
int version, int valueCount, int bitsPerValue) throws IOException {
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
throw new AssertionError("Unknwown format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a direct {@link Reader} from an {@link IndexInput}. This method
|
||||
* is useful to restore data from streams which have been created using
|
||||
* {@link PackedInts#getWriter(DataOutput, int, int, float)}.
|
||||
* </p><p>
|
||||
* The returned reader will have very little memory overhead, but every call
|
||||
* to {@link Reader#get(int)} is likely to perform a disk seek.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @return a direct Reader
|
||||
* @throws IOException
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Reader getDirectReader(IndexInput in) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array with the given amount of values initialized
|
||||
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
||||
* All Mutables known by this factory are kept fully in RAM.
|
||||
*
|
||||
* </p><p>
|
||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||
* for speed by selecting a faster but potentially less memory-efficient
|
||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||
|
@ -433,103 +706,130 @@ public class PackedInts {
|
|||
*/
|
||||
public static Mutable getMutable(int valueCount,
|
||||
int bitsPerValue, float acceptableOverheadRatio) {
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
assert valueCount >= 0;
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||
switch (formatAndBits.format) {
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
|
||||
case PACKED:
|
||||
switch (formatAndBits.bitsPerValue) {
|
||||
case 8:
|
||||
return new Direct8(valueCount);
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
case 16:
|
||||
return new Direct16(valueCount);
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
case 32:
|
||||
return new Direct32(valueCount);
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
case 64:
|
||||
return new Direct64(valueCount);
|
||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||
case 24:
|
||||
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||
return new Packed8ThreeBlocks(valueCount);
|
||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||
}
|
||||
break;
|
||||
case 48:
|
||||
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||
return new Packed16ThreeBlocks(valueCount);
|
||||
} else {
|
||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||
if (overhead <= acceptableOverhead) {
|
||||
return Packed64SingleBlock.create(valueCount, bpv);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return new Packed64(valueCount, bitsPerValue);
|
||||
return new Packed64(valueCount, formatAndBits.bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array writer for the given number of values at the
|
||||
* given bits/value. Writers append to the given IndexOutput and has very
|
||||
* low memory overhead.
|
||||
* Expert: Create a packed integer array writer for the given output, format,
|
||||
* value count, and number of bits per value.
|
||||
* </p><p>
|
||||
* The resulting stream will be long-aligned. This means that depending on
|
||||
* the format which is used, up to 63 bits will be wasted. An easy way to
|
||||
* make sure that no space is lost is to always use a <code>valueCount</code>
|
||||
* that is a multiple of 64.
|
||||
* </p><p>
|
||||
* This method does not write any metadata to the stream, meaning that it is
|
||||
* your responsibility to store it somewhere else in order to be able to
|
||||
* recover data from the stream later on:
|
||||
* <ul>
|
||||
* <li><code>format</code> (using {@link Format#getId()}),</li>
|
||||
* <li><code>valueCount</code>,</li>
|
||||
* <li><code>bitsPerValue</code>,</li>
|
||||
* <li>{@link #VERSION_CURRENT}.</li>
|
||||
* </ul>
|
||||
* </p><p>
|
||||
* It is possible to start writing values without knowing how many of them you
|
||||
* are actually going to write. To do this, just pass <code>-1</code> as
|
||||
* <code>valueCount</code>. On the other hand, for any positive value of
|
||||
* <code>valueCount</code>, the returned writer will make sure that you don't
|
||||
* write more values than expected and pad the end of stream with zeros in
|
||||
* case you have written less than <code>valueCount</code> when calling
|
||||
* {@link Writer#finish()}.
|
||||
* </p><p>
|
||||
* The <code>mem</code> parameter lets you control how much memory can be used
|
||||
* to buffer changes in memory before flushing to disk. High values of
|
||||
* <code>mem</code> are likely to improve throughput. On the other hand, if
|
||||
* speed is not that important to you, a value of <code>0</code> will use as
|
||||
* little memory as possible and should already offer reasonable throughput.
|
||||
*
|
||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||
* @param out the data output
|
||||
* @param format the format to use to serialize the values
|
||||
* @param valueCount the number of values
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param mem how much memory (in bytes) can be used to speed up serialization
|
||||
* @return a Writer
|
||||
* @throws IOException
|
||||
* @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
|
||||
* @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Writer getWriterNoHeader(
|
||||
DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) throws IOException {
|
||||
return new PackedWriter(format, out, valueCount, bitsPerValue, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array writer for the given output, format, value
|
||||
* count, and number of bits per value.
|
||||
* </p><p>
|
||||
* The resulting stream will be long-aligned. This means that depending on
|
||||
* the format which is used under the hoods, up to 63 bits will be wasted.
|
||||
* An easy way to make sure that no space is lost is to always use a
|
||||
* <code>valueCount</code> that is a multiple of 64.
|
||||
* </p><p>
|
||||
* This method writes metadata to the stream, so that the resulting stream is
|
||||
* sufficient to restore a {@link Reader} from it. You don't need to track
|
||||
* <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case
|
||||
* this is a problem, you should probably look at
|
||||
* {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
* </p><p>
|
||||
* The <code>acceptableOverheadRatio</code> parameter controls how
|
||||
* readers that will be restored from this stream trade space
|
||||
* for speed by selecting a faster but potentially less memory-efficient
|
||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||
* that the fastest implementation is selected.
|
||||
* that the fastest implementation is selected. In case you are only interested
|
||||
* in reading this stream sequentially later on, you should probably use
|
||||
* {@link PackedInts#COMPACT}.
|
||||
*
|
||||
* @param out the destination for the produced bits.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @param out the data output
|
||||
* @param valueCount the number of values
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
* @return a Writer ready for receiving values.
|
||||
* @throws IOException if bits could not be written to out.
|
||||
* @return a Writer
|
||||
* @throws IOException
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Writer getWriter(DataOutput out,
|
||||
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
|
||||
throws IOException {
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
assert valueCount >= 0;
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
return getWriterByFormat(out, valueCount, 8, PACKED);
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
return getWriterByFormat(out, valueCount, 16, PACKED);
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
return getWriterByFormat(out, valueCount, 32, PACKED);
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
return getWriterByFormat(out, valueCount, 64, PACKED);
|
||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||
return getWriterByFormat(out, valueCount, 24, PACKED);
|
||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||
return getWriterByFormat(out, valueCount, 48, PACKED);
|
||||
} else {
|
||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||
if (overhead <= acceptableOverhead) {
|
||||
return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
|
||||
}
|
||||
}
|
||||
}
|
||||
return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
|
||||
}
|
||||
}
|
||||
|
||||
private static Writer getWriterByFormat(DataOutput out,
|
||||
int valueCount, int bitsPerValue, int format) throws IOException {
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown format " + format);
|
||||
}
|
||||
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||
final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
return writer;
|
||||
}
|
||||
|
||||
/** Returns how many bits are required to hold values up
|
||||
|
|
|
@ -17,76 +17,71 @@ package org.apache.lucene.util.packed;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
|
||||
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
||||
private long pending;
|
||||
private int pendingBitsLeft;
|
||||
private int position = -1;
|
||||
|
||||
// masks[n-1] masks for bottom n bits
|
||||
private final long[] masks;
|
||||
final PackedInts.Format format;
|
||||
final BulkOperation bulkOperation;
|
||||
final long[] nextBlocks;
|
||||
final LongsRef nextValues;
|
||||
final int iterations;
|
||||
int position;
|
||||
|
||||
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
|
||||
PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
|
||||
super(valueCount, bitsPerValue, in);
|
||||
|
||||
masks = new long[bitsPerValue];
|
||||
|
||||
long v = 1;
|
||||
for (int i = 0; i < bitsPerValue; i++) {
|
||||
v *= 2;
|
||||
masks[i] = v - 1;
|
||||
}
|
||||
this.format = format;
|
||||
bulkOperation = BulkOperation.of(format, bitsPerValue);
|
||||
iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||
assert iterations > 0;
|
||||
nextBlocks = new long[iterations * bulkOperation.blocks()];
|
||||
nextValues = new LongsRef(new long[iterations * bulkOperation.values()], 0, 0);
|
||||
assert iterations * bulkOperation.values() == nextValues.longs.length;
|
||||
assert iterations * bulkOperation.blocks() == nextBlocks.length;
|
||||
nextValues.offset = nextValues.longs.length;
|
||||
position = -1;
|
||||
}
|
||||
|
||||
public long next() throws IOException {
|
||||
if (pendingBitsLeft == 0) {
|
||||
pending = in.readLong();
|
||||
pendingBitsLeft = 64;
|
||||
@Override
|
||||
public LongsRef next(int count) throws IOException {
|
||||
assert nextValues.length >= 0;
|
||||
assert count > 0;
|
||||
assert nextValues.offset + nextValues.length <= nextValues.longs.length;
|
||||
|
||||
nextValues.offset += nextValues.length;
|
||||
|
||||
final int remaining = valueCount - position - 1;
|
||||
if (remaining <= 0) {
|
||||
throw new EOFException();
|
||||
}
|
||||
count = Math.min(remaining, count);
|
||||
|
||||
if (nextValues.offset == nextValues.longs.length) {
|
||||
final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
|
||||
final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
|
||||
for (int i = 0; i < blocksToRead; ++i) {
|
||||
nextBlocks[i] = in.readLong();
|
||||
}
|
||||
for (int i = blocksToRead; i < nextBlocks.length; ++i) {
|
||||
nextBlocks[i] = 0L;
|
||||
}
|
||||
|
||||
final long result;
|
||||
if (pendingBitsLeft >= bitsPerValue) { // not split
|
||||
result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
|
||||
pendingBitsLeft -= bitsPerValue;
|
||||
} else { // split
|
||||
final int bits1 = bitsPerValue - pendingBitsLeft;
|
||||
final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
|
||||
pending = in.readLong();
|
||||
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
|
||||
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
|
||||
result = result1 | result2;
|
||||
bulkOperation.get(nextBlocks, 0, nextValues.longs, 0, iterations);
|
||||
nextValues.offset = 0;
|
||||
}
|
||||
|
||||
++position;
|
||||
return result;
|
||||
nextValues.length = Math.min(nextValues.longs.length - nextValues.offset, count);
|
||||
position += nextValues.length;
|
||||
return nextValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public long advance(final int ord) throws IOException{
|
||||
assert ord < valueCount : "ord must be less than valueCount";
|
||||
assert ord > position : "ord must be greater than the current position";
|
||||
final long bits = (long) bitsPerValue;
|
||||
final int posToSkip = ord - 1 - position;
|
||||
final long bitsToSkip = (bits * (long)posToSkip);
|
||||
if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
|
||||
pendingBitsLeft -= bitsToSkip;
|
||||
} else {
|
||||
final long skip = bitsToSkip-pendingBitsLeft;
|
||||
final long closestByte = (skip >> 6) << 3;
|
||||
if (closestByte != 0) { // need to seek
|
||||
final long filePointer = in.getFilePointer();
|
||||
in.seek(filePointer + closestByte);
|
||||
}
|
||||
pending = in.readLong();
|
||||
pendingBitsLeft = 64 - (int)(skip % 64);
|
||||
}
|
||||
position = ord-1;
|
||||
return next();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,101 +19,79 @@ package org.apache.lucene.util.packed;
|
|||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
|
||||
// Packs high order byte first, to match
|
||||
// IndexOutput.writeInt/Long/Short byte order
|
||||
|
||||
/**
|
||||
* Generic writer for space-optimal packed values. The resulting bits can be
|
||||
* used directly by Packed32, Packed64 and PackedDirect* and will always be
|
||||
* long-aligned.
|
||||
*/
|
||||
final class PackedWriter extends PackedInts.Writer {
|
||||
|
||||
class PackedWriter extends PackedInts.Writer {
|
||||
private long pending;
|
||||
private int pendingBitPos;
|
||||
boolean finished;
|
||||
final PackedInts.Format format;
|
||||
final BulkOperation bulkOperation;
|
||||
final long[] nextBlocks;
|
||||
final long[] nextValues;
|
||||
final int iterations;
|
||||
int off;
|
||||
int written;
|
||||
|
||||
// masks[n-1] masks for bottom n bits
|
||||
private final long[] masks;
|
||||
private int written = 0;
|
||||
|
||||
public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
|
||||
PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem)
|
||||
throws IOException {
|
||||
super(out, valueCount, bitsPerValue);
|
||||
|
||||
pendingBitPos = 64;
|
||||
masks = new long[bitsPerValue - 1];
|
||||
|
||||
long v = 1;
|
||||
for (int i = 0; i < bitsPerValue - 1; i++) {
|
||||
v *= 2;
|
||||
masks[i] = v - 1;
|
||||
}
|
||||
this.format = format;
|
||||
bulkOperation = BulkOperation.of(format, bitsPerValue);
|
||||
iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||
nextBlocks = new long[iterations * bulkOperation.blocks()];
|
||||
nextValues = new long[iterations * bulkOperation.values()];
|
||||
off = 0;
|
||||
written = 0;
|
||||
finished = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED;
|
||||
protected PackedInts.Format getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not call this after finish
|
||||
*/
|
||||
@Override
|
||||
public void add(long v) throws IOException {
|
||||
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
||||
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
||||
assert v >= 0;
|
||||
//System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
|
||||
|
||||
// TODO
|
||||
if (pendingBitPos >= bitsPerValue) {
|
||||
// not split
|
||||
|
||||
// write-once, so we can |= w/o first masking to 0s
|
||||
pending |= v << (pendingBitPos - bitsPerValue);
|
||||
if (pendingBitPos == bitsPerValue) {
|
||||
// flush
|
||||
out.writeLong(pending);
|
||||
pending = 0;
|
||||
pendingBitPos = 64;
|
||||
} else {
|
||||
pendingBitPos -= bitsPerValue;
|
||||
assert v >= 0 && v <= PackedInts.maxValue(bitsPerValue);
|
||||
assert !finished;
|
||||
if (valueCount != -1 && written >= valueCount) {
|
||||
throw new EOFException("Writing past end of stream");
|
||||
}
|
||||
|
||||
} else {
|
||||
// split
|
||||
|
||||
// write top pendingBitPos bits of value into bottom bits of pending
|
||||
pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
|
||||
//System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
|
||||
|
||||
// flush
|
||||
out.writeLong(pending);
|
||||
|
||||
// write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
|
||||
pendingBitPos = 64 - bitsPerValue + pendingBitPos;
|
||||
//System.out.println(" part2 v << " + pendingBitPos);
|
||||
pending = (v << pendingBitPos);
|
||||
nextValues[off++] = v;
|
||||
if (off == nextValues.length) {
|
||||
flush(nextValues.length);
|
||||
off = 0;
|
||||
}
|
||||
written++;
|
||||
++written;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
assert !finished;
|
||||
if (valueCount != -1) {
|
||||
while (written < valueCount) {
|
||||
add(0L); // Auto flush
|
||||
add(0L);
|
||||
}
|
||||
}
|
||||
flush(off);
|
||||
finished = true;
|
||||
}
|
||||
|
||||
if (pendingBitPos != 64) {
|
||||
out.writeLong(pending);
|
||||
private void flush(int nvalues) throws IOException {
|
||||
bulkOperation.set(nextBlocks, 0, nextValues, 0, iterations);
|
||||
final int blocks = format.nblocks(bitsPerValue, nvalues);
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
out.writeLong(nextBlocks[i]);
|
||||
}
|
||||
off = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PackedWriter(written " + written + "/" + valueCount + " with "
|
||||
+ bitsPerValue + " bits/value)";
|
||||
public int ord() {
|
||||
return written - 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from fractions import gcd
|
||||
|
||||
"""Code generation for bulk operations"""
|
||||
|
||||
PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
|
||||
OUTPUT_FILE = "BulkOperation.java"
|
||||
HEADER = """// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.EnumMap;
|
||||
|
||||
/**
|
||||
* Efficient sequential read/write of packed integers.
|
||||
*/
|
||||
abstract class BulkOperation {
|
||||
|
||||
static final EnumMap<PackedInts.Format, BulkOperation[]> BULK_OPERATIONS = new EnumMap<PackedInts.Format, BulkOperation[]>(PackedInts.Format.class);
|
||||
|
||||
public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64;
|
||||
BulkOperation[] ops = BULK_OPERATIONS.get(format);
|
||||
if (ops == null || ops[bitsPerValue] == null) {
|
||||
throw new IllegalArgumentException("format: " + format + ", bitsPerValue: " + bitsPerValue);
|
||||
}
|
||||
return ops[bitsPerValue];
|
||||
}
|
||||
|
||||
/**
|
||||
* For every number of bits per value, there is a minimum number of
|
||||
* blocks (b) / values (v) you need to write in order to reach the next block
|
||||
* boundary:
|
||||
* - 16 bits per value -> b=1, v=4
|
||||
* - 24 bits per value -> b=3, v=8
|
||||
* - 50 bits per value -> b=25, v=32
|
||||
* - 63 bits per value -> b=63, v = 64
|
||||
* - ...
|
||||
*
|
||||
* A bulk read consists in copying <code>iterations*v</code> values that are
|
||||
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
|
||||
* (higher values of <code>iterations</code> are likely to yield a better
|
||||
* throughput) => this requires n * (b + v) longs in memory.
|
||||
*
|
||||
* This method computes <code>iterations</code> as
|
||||
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
|
||||
*/
|
||||
public final int computeIterations(int valueCount, int ramBudget) {
|
||||
final int iterations = (ramBudget >>> 3) / (blocks() + values());
|
||||
if (iterations == 0) {
|
||||
// at least 1
|
||||
return 1;
|
||||
} else if ((iterations - 1) * blocks() >= valueCount) {
|
||||
// don't allocate for more than the size of the reader
|
||||
return (int) Math.ceil((double) valueCount / values());
|
||||
} else {
|
||||
return iterations;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The minimum number of blocks required to perform a bulk get/set.
|
||||
*/
|
||||
public abstract int blocks();
|
||||
|
||||
/**
|
||||
* The number of values that can be stored in <code>blocks()</code> blocks.
|
||||
*/
|
||||
public abstract int values();
|
||||
|
||||
/**
|
||||
* Get <code>n * values()</code> values from <code>n * blocks()</code> blocks.
|
||||
*/
|
||||
public abstract void get(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
|
||||
|
||||
/**
|
||||
* Set <code>n * values()</code> values into <code>n * blocks()</code> blocks.
|
||||
*/
|
||||
public abstract void set(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
|
||||
|
||||
"""
|
||||
|
||||
FOOTER = "}"
|
||||
|
||||
def packed64singleblock(bpv, f):
|
||||
values = 64 / bpv
|
||||
f.write("\n static final class Packed64SingleBlockBulkOperation%d extends BulkOperation {\n\n" %bpv)
|
||||
f.write(" public int blocks() {\n")
|
||||
f.write(" return 1;\n")
|
||||
f.write(" }\n\n")
|
||||
f.write(" public int values() {\n")
|
||||
f.write(" return %d;\n" %values)
|
||||
f.write(" }\n\n")
|
||||
|
||||
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||
f.write(" final long block = blocks[bi++];\n")
|
||||
mask = (1 << bpv) - 1
|
||||
for i in xrange(values):
|
||||
block_offset = i / values
|
||||
offset_in_block = i % values
|
||||
if i == 0:
|
||||
f.write(" values[vi++] = block & %dL;\n" %mask)
|
||||
elif i == values - 1:
|
||||
f.write(" values[vi++] = block >>> %d;\n" %(i * bpv))
|
||||
else:
|
||||
f.write(" values[vi++] = (block >>> %d) & %dL;\n" %(i * bpv, mask))
|
||||
f.write(" }\n")
|
||||
f.write(" }\n\n")
|
||||
|
||||
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||
for i in xrange(values):
|
||||
block_offset = i / values
|
||||
offset_in_block = i % values
|
||||
if i == 0:
|
||||
f.write(" blocks[bi++] = values[vi++]")
|
||||
else:
|
||||
f.write(" | (values[vi++] << %d)" %(i * bpv))
|
||||
if i == values - 1:
|
||||
f.write(";\n")
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
|
||||
f.write(" }\n")
|
||||
|
||||
def packed64(bpv, f):
|
||||
blocks = bpv
|
||||
values = blocks * 64 / bpv
|
||||
while blocks % 2 == 0 and values % 2 == 0:
|
||||
blocks /= 2
|
||||
values /= 2
|
||||
assert values * bpv == 64 * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
|
||||
mask = (1 << bpv) - 1
|
||||
f.write(" static final class Packed64BulkOperation%d extends BulkOperation {\n\n" %bpv)
|
||||
f.write(" public int blocks() {\n")
|
||||
f.write(" return %d;\n" %blocks)
|
||||
f.write(" }\n\n")
|
||||
f.write(" public int values() {\n")
|
||||
f.write(" return %d;\n" %values)
|
||||
f.write(" }\n\n")
|
||||
|
||||
if bpv == 64:
|
||||
f.write(""" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {
|
||||
System.arraycopy(blocks, bi, values, vi, iterations);
|
||||
}
|
||||
|
||||
public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {
|
||||
System.arraycopy(values, bi, blocks, vi, iterations);
|
||||
}
|
||||
}
|
||||
""")
|
||||
return
|
||||
|
||||
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||
for i in xrange(0, values):
|
||||
block_offset = i * bpv / 64
|
||||
bit_offset = (i * bpv) % 64
|
||||
if bit_offset == 0:
|
||||
# start of block
|
||||
f.write(" final long block%d = blocks[bi++];\n" %block_offset);
|
||||
f.write(" values[vi++] = block%d >>> %d;\n" %(block_offset, 64 - bpv))
|
||||
elif bit_offset + bpv == 64:
|
||||
# end of block
|
||||
f.write(" values[vi++] = block%d & %dL;\n" %(block_offset, mask))
|
||||
elif bit_offset + bpv < 64:
|
||||
# middle of block
|
||||
f.write(" values[vi++] = (block%d >>> %d) & %dL;\n" %(block_offset, 64 - bit_offset - bpv, mask))
|
||||
else:
|
||||
# value spans across 2 blocks
|
||||
mask1 = (1 << (64 - bit_offset)) -1
|
||||
shift1 = bit_offset + bpv - 64
|
||||
shift2 = 64 - shift1
|
||||
f.write(" final long block%d = blocks[bi++];\n" %(block_offset + 1));
|
||||
f.write(" values[vi++] = ((block%d & %dL) << %d) | (block%d >>> %d);\n" %(block_offset, mask1, shift1, block_offset + 1, shift2))
|
||||
f.write(" }\n")
|
||||
f.write(" }\n\n")
|
||||
|
||||
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||
for i in xrange(0, values):
|
||||
block_offset = i * bpv / 64
|
||||
bit_offset = (i * bpv) % 64
|
||||
if bit_offset == 0:
|
||||
# start of block
|
||||
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - bpv))
|
||||
elif bit_offset + bpv == 64:
|
||||
# end of block
|
||||
f.write(" | values[vi++];\n")
|
||||
elif bit_offset + bpv < 64:
|
||||
# inside a block
|
||||
f.write(" | (values[vi++] << %d)" %(64 - bit_offset - bpv))
|
||||
else:
|
||||
# value spans across 2 blocks
|
||||
right_bits = bit_offset + bpv - 64
|
||||
f.write(" | (values[vi] >>> %d);\n" %right_bits)
|
||||
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - right_bits))
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
|
||||
f.write(" }\n\n")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p64_bpv = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
|
||||
f = open(OUTPUT_FILE, 'w')
|
||||
f.write(HEADER)
|
||||
f.write(" static {\n")
|
||||
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED, new BulkOperation[65]);")
|
||||
for bpv in xrange(1, 65):
|
||||
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED)[%d] = new Packed64BulkOperation%d();\n" %(bpv, bpv))
|
||||
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED_SINGLE_BLOCK, new BulkOperation[65]);\n")
|
||||
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
|
||||
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED_SINGLE_BLOCK)[%d] = new Packed64SingleBlockBulkOperation%d();\n" %(bpv, bpv))
|
||||
f.write(" }\n")
|
||||
for bpv in xrange(1, 65):
|
||||
packed64(bpv, f)
|
||||
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
|
||||
packed64singleblock(bpv,f)
|
||||
f.write(FOOTER)
|
||||
f.close()
|
|
@ -0,0 +1,175 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
HEADER="""// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
"""
|
||||
|
||||
TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
|
||||
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
|
||||
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
|
||||
|
||||
if __name__ == '__main__':
|
||||
for bpv in TYPES.keys():
|
||||
type
|
||||
f = open("Direct%d.java" %bpv, 'w')
|
||||
f.write(HEADER)
|
||||
f.write("""/**
|
||||
* Direct wrapping of %d-bits values to a backing array.
|
||||
* @lucene.internal
|
||||
*/\n""" %bpv)
|
||||
f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
|
||||
f.write(" final %s[] values;\n\n" %TYPES[bpv])
|
||||
|
||||
f.write(" Direct%d(int valueCount) {\n" %bpv)
|
||||
f.write(" super(valueCount, %d);\n" %bpv)
|
||||
f.write(" values = new %s[valueCount];\n" %TYPES[bpv])
|
||||
f.write(" }\n\n")
|
||||
|
||||
f.write(" Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
|
||||
f.write(" this(valueCount);\n")
|
||||
f.write(" for (int i = 0; i < valueCount; ++i) {\n")
|
||||
f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
|
||||
f.write(" }\n")
|
||||
if bpv != 64:
|
||||
f.write(" final int mod = valueCount %% %d;\n" %(64 / bpv))
|
||||
f.write(" if (mod != 0) {\n")
|
||||
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
|
||||
f.write(" in.read%s();\n" %TYPES[bpv].title())
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
|
||||
f.write("""
|
||||
@Override
|
||||
public long get(final int index) {
|
||||
return values[index]%s;
|
||||
}
|
||||
|
||||
public void set(final int index, final long value) {
|
||||
values[index] = %s(value);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(values);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(values, %s0L);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
""" %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
|
||||
|
||||
if bpv == 64:
|
||||
f.write("""
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(values, index, arr, off, gets);
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
System.arraycopy(arr, off, values, index, sets);
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
Arrays.fill(values, fromIndex, toIndex, val);
|
||||
}
|
||||
""")
|
||||
else:
|
||||
f.write("""
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = values[i]%s;
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||
values[i] = %sarr[o];
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val == (val%s);
|
||||
Arrays.fill(values, fromIndex, toIndex, %sval);
|
||||
}
|
||||
""" %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
|
||||
|
||||
f.write("}\n")
|
||||
|
||||
f.close()
|
|
@ -58,12 +58,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||
}
|
||||
|
||||
public static float overheadPerValue(int bitsPerValue) {
|
||||
int valuesPerBlock = 64 / bitsPerValue;
|
||||
int overhead = 64 %% bitsPerValue;
|
||||
return (float) overhead / valuesPerBlock;
|
||||
}
|
||||
|
||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||
return valueCount / valuesPerBlock
|
||||
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1);
|
||||
|
@ -111,16 +105,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
|
||||
// bulk get
|
||||
assert index %% valuesPerBlock == 0;
|
||||
final long readMask = (1L << bitsPerValue) - 1;
|
||||
final int startBlock = index / valuesPerBlock;
|
||||
final int endBlock = (index + len) / valuesPerBlock;
|
||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.blocks() == 1;
|
||||
assert op.values() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.get(blocks, blockIndex, arr, off, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff; len -= diff;
|
||||
for (int block = startBlock; block < endBlock; ++block) {
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
|
||||
}
|
||||
}
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
|
@ -157,17 +149,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
|
||||
// bulk set
|
||||
assert index %% valuesPerBlock == 0;
|
||||
final int startBlock = index / valuesPerBlock;
|
||||
final int endBlock = (index + len) / valuesPerBlock;
|
||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.blocks() == 1;
|
||||
assert op.values() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.set(blocks, blockIndex, arr, off, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff; len -= diff;
|
||||
for (int block = startBlock; block < endBlock; ++block) {
|
||||
long next = 0L;
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
next |= (arr[off++] << (i * bitsPerValue));
|
||||
}
|
||||
blocks[block] = next;
|
||||
}
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
|
@ -221,8 +210,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getFormat() {
|
||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||
protected PackedInts.Format getFormat() {
|
||||
return PackedInts.Format.PACKED_SINGLE_BLOCK;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
HEADER="""// This file has been automatically generated, DO NOT EDIT
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
"""
|
||||
|
||||
TYPES = {8: "byte", 16: "short"}
|
||||
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
|
||||
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
|
||||
|
||||
if __name__ == '__main__':
|
||||
for bpv in TYPES.keys():
|
||||
type
|
||||
f = open("Packed%dThreeBlocks.java" %bpv, 'w')
|
||||
f.write(HEADER)
|
||||
f.write("""/**
|
||||
* Packs integers into 3 %ss (%d bits per value).
|
||||
* @lucene.internal
|
||||
*/\n""" %(TYPES[bpv], bpv*3))
|
||||
f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
|
||||
f.write(" final %s[] blocks;\n\n" %TYPES[bpv])
|
||||
|
||||
f.write(" public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
|
||||
|
||||
f.write(" Packed%dThreeBlocks(int valueCount) {\n" %bpv)
|
||||
f.write(" super(valueCount, %d);\n" %(bpv*3))
|
||||
f.write(" if (valueCount > MAX_SIZE) {\n")
|
||||
f.write(" throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
|
||||
f.write(" }\n")
|
||||
f.write(" blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
|
||||
f.write(" }\n\n")
|
||||
|
||||
f.write(" Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
|
||||
f.write(" this(valueCount);\n")
|
||||
f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
|
||||
f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
|
||||
f.write(" }\n")
|
||||
f.write(" final int mod = blocks.length %% %d;\n" %(64 / bpv))
|
||||
f.write(" if (mod != 0) {\n")
|
||||
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
|
||||
f.write(" in.read%s();\n" %TYPES[bpv].title())
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
f.write(" }\n")
|
||||
|
||||
f.write("""
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index * 3;
|
||||
return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||
arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index * 3;
|
||||
blocks[o] = %s(value >>> %d);
|
||||
blocks[o+1] = %s(value >>> %d);
|
||||
blocks[o+2] = %svalue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int sets = Math.min(valueCount - index, len);
|
||||
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||
final long value = arr[i];
|
||||
blocks[o++] = %s(value >>> %d);
|
||||
blocks[o++] = %s(value >>> %d);
|
||||
blocks[o++] = %svalue;
|
||||
}
|
||||
return sets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
final %s block1 = %s(val >>> %d);
|
||||
final %s block2 = %s(val >>> %d);
|
||||
final %s block3 = %sval;
|
||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||
blocks[i] = block1;
|
||||
blocks[i+1] = block2;
|
||||
blocks[i+2] = block3;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, %s0);
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||
}
|
||||
}
|
||||
""" %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
|
||||
2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
|
||||
CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
|
||||
|
||||
f.close()
|
|
@ -19,9 +19,16 @@
|
|||
<head></head>
|
||||
<body bgcolor="white">
|
||||
|
||||
<p>Packed integer arrays and streams.</p>
|
||||
|
||||
<p>
|
||||
The packed package provides random access capable arrays of positive longs.
|
||||
The implementations provides different trade offs between memory usage and
|
||||
The packed package provides
|
||||
<ul>
|
||||
<li>sequential and random access capable arrays of positive longs,</li>
|
||||
<li>routines for efficient serialization and deserialization of streams of packed integers.</li>
|
||||
</ul>
|
||||
|
||||
The implementations provide different trade-offs between memory usage and
|
||||
access speed. The standard usage scenario is replacing large int or long
|
||||
arrays in order to reduce the memory footprint.
|
||||
</p><p>
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
|
@ -57,6 +58,9 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
for(int nbits=1;nbits<=64;nbits++) {
|
||||
final long maxValue = PackedInts.maxValue(nbits);
|
||||
final int valueCount = 100+random().nextInt(500);
|
||||
final int bufferSize = random().nextBoolean()
|
||||
? _TestUtil.nextInt(random(), 0, 48)
|
||||
: _TestUtil.nextInt(random(), 0, 4096);
|
||||
final Directory d = newDirectory();
|
||||
|
||||
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
||||
|
@ -79,10 +83,10 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
// ensure that finish() added the (valueCount-actualValueCount) missing values
|
||||
final long bytes;
|
||||
switch (w.getFormat()) {
|
||||
case PackedInts.PACKED:
|
||||
case PACKED:
|
||||
bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
|
||||
break;
|
||||
case PackedInts.PACKED_SINGLE_BLOCK:
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
final int valuesPerBlock = 64 / w.bitsPerValue;
|
||||
bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
|
||||
break;
|
||||
|
@ -97,7 +101,7 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header
|
||||
assertEquals(w.bitsPerValue, in.readVInt());
|
||||
assertEquals(valueCount, in.readVInt());
|
||||
assertEquals(w.getFormat(), in.readVInt());
|
||||
assertEquals(w.getFormat().getId(), in.readVInt());
|
||||
assertEquals(startFp, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
|
@ -113,37 +117,34 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
}
|
||||
in.close();
|
||||
}
|
||||
|
||||
{ // test reader iterator next
|
||||
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
||||
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in);
|
||||
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
assertEquals("index=" + i + " valueCount="
|
||||
+ valueCount + " nbits=" + nbits + " for "
|
||||
+ r.getClass().getSimpleName(), values[i], r.next());
|
||||
assertEquals(i, r.ord());
|
||||
}
|
||||
assertEquals(fp, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
{ // test reader iterator next vs. advance
|
||||
|
||||
{ // test reader iterator bulk next
|
||||
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
||||
PackedInts.ReaderIterator intsEnum = PackedInts.getReaderIterator(in);
|
||||
for (int i = 0; i < valueCount; i +=
|
||||
1 + ((valueCount - i) <= 20 ? random().nextInt(valueCount - i)
|
||||
: random().nextInt(20))) {
|
||||
final String msg = "index=" + i + " valueCount="
|
||||
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
|
||||
int i = 0;
|
||||
while (i < valueCount) {
|
||||
final int count = _TestUtil.nextInt(random(), 1, 95);
|
||||
final LongsRef next = r.next(count);
|
||||
for (int k = 0; k < next.length; ++k) {
|
||||
assertEquals("index=" + i + " valueCount="
|
||||
+ valueCount + " nbits=" + nbits + " for "
|
||||
+ intsEnum.getClass().getSimpleName();
|
||||
if (i - intsEnum.ord() == 1 && random().nextBoolean()) {
|
||||
assertEquals(msg, values[i], intsEnum.next());
|
||||
} else {
|
||||
assertEquals(msg, values[i], intsEnum.advance(i));
|
||||
+ r.getClass().getSimpleName(), values[i + k], next.longs[next.offset + k]);
|
||||
}
|
||||
assertEquals(msg, i, intsEnum.ord());
|
||||
i += next.length;
|
||||
}
|
||||
if (intsEnum.ord() < valueCount - 1)
|
||||
assertEquals(values[valueCount - 1], intsEnum
|
||||
.advance(valueCount - 1));
|
||||
assertEquals(valueCount - 1, intsEnum.ord());
|
||||
assertEquals(fp, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue