mirror of https://github.com/apache/lucene.git
LUCENE-4161: Make packed int arrays usable by codecs.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1357159 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9250082566
commit
e96b143b6a
|
@ -315,10 +315,10 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
try {
|
try {
|
||||||
// Subsample the index terms
|
// Subsample the index terms
|
||||||
clone1.seek(packedIndexStart);
|
clone1.seek(packedIndexStart);
|
||||||
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
|
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||||
|
|
||||||
clone2.seek(packedOffsetsStart);
|
clone2.seek(packedOffsetsStart);
|
||||||
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
|
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||||
|
|
||||||
// TODO: often we can get by w/ fewer bits per
|
// TODO: often we can get by w/ fewer bits per
|
||||||
// value, below.. .but this'd be more complex:
|
// value, below.. .but this'd be more complex:
|
||||||
|
|
|
@ -121,7 +121,7 @@ class VarStraightBytesImpl {
|
||||||
final IndexInput cloneIdx = reader.cloneIndex();
|
final IndexInput cloneIdx = reader.cloneIndex();
|
||||||
try {
|
try {
|
||||||
numDataBytes = cloneIdx.readVLong();
|
numDataBytes = cloneIdx.readVLong();
|
||||||
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
|
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||||
for (int i = 0; i < maxDocs; i++) {
|
for (int i = 0; i < maxDocs; i++) {
|
||||||
long offset = iter.next();
|
long offset = iter.next();
|
||||||
++lastDocID;
|
++lastDocID;
|
||||||
|
|
|
@ -0,0 +1,167 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Represents long[], as a slice (offset + length) into an
|
||||||
|
* existing long[]. The {@link #longs} member should never be null; use
|
||||||
|
* {@link #EMPTY_LONGS} if necessary.
|
||||||
|
*
|
||||||
|
* @lucene.internal */
|
||||||
|
public final class LongsRef implements Comparable<LongsRef>, Cloneable {
|
||||||
|
|
||||||
|
public static final long[] EMPTY_LONGS = new long[0];
|
||||||
|
|
||||||
|
public long[] longs;
|
||||||
|
public int offset;
|
||||||
|
public int length;
|
||||||
|
|
||||||
|
public LongsRef() {
|
||||||
|
longs = EMPTY_LONGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LongsRef(int capacity) {
|
||||||
|
longs = new long[capacity];
|
||||||
|
}
|
||||||
|
|
||||||
|
public LongsRef(long[] longs, int offset, int length) {
|
||||||
|
assert longs != null;
|
||||||
|
assert offset >= 0;
|
||||||
|
assert length >= 0;
|
||||||
|
assert longs.length >= offset + length;
|
||||||
|
this.longs = longs;
|
||||||
|
this.offset = offset;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongsRef clone() {
|
||||||
|
return new LongsRef(longs, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 0;
|
||||||
|
final long end = offset + length;
|
||||||
|
for(int i = offset; i < end; i++) {
|
||||||
|
result = prime * result + (int) (longs[i] ^ (longs[i]>>>32));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (other instanceof LongsRef) {
|
||||||
|
return this.longsEquals((LongsRef) other);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean longsEquals(LongsRef other) {
|
||||||
|
if (length == other.length) {
|
||||||
|
int otherUpto = other.offset;
|
||||||
|
final long[] otherInts = other.longs;
|
||||||
|
final long end = offset + length;
|
||||||
|
for(int upto=offset; upto<end; upto++,otherUpto++) {
|
||||||
|
if (longs[upto] != otherInts[otherUpto]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Signed int order comparison */
|
||||||
|
public int compareTo(LongsRef other) {
|
||||||
|
if (this == other) return 0;
|
||||||
|
|
||||||
|
final long[] aInts = this.longs;
|
||||||
|
int aUpto = this.offset;
|
||||||
|
final long[] bInts = other.longs;
|
||||||
|
int bUpto = other.offset;
|
||||||
|
|
||||||
|
final long aStop = aUpto + Math.min(this.length, other.length);
|
||||||
|
|
||||||
|
while(aUpto < aStop) {
|
||||||
|
long aInt = aInts[aUpto++];
|
||||||
|
long bInt = bInts[bUpto++];
|
||||||
|
if (aInt > bInt) {
|
||||||
|
return 1;
|
||||||
|
} else if (aInt < bInt) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// One is a prefix of the other, or, they are equal:
|
||||||
|
return this.length - other.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void copyLongs(LongsRef other) {
|
||||||
|
if (longs.length - offset < other.length) {
|
||||||
|
longs = new long[other.length];
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
System.arraycopy(other.longs, other.offset, longs, offset, other.length);
|
||||||
|
length = other.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to grow the reference array.
|
||||||
|
*
|
||||||
|
* In general this should not be used as it does not take the offset into account.
|
||||||
|
* @lucene.internal */
|
||||||
|
public void grow(int newLength) {
|
||||||
|
assert offset == 0;
|
||||||
|
if (longs.length < newLength) {
|
||||||
|
longs = ArrayUtil.grow(longs, newLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append('[');
|
||||||
|
final long end = offset + length;
|
||||||
|
for(int i=offset;i<end;i++) {
|
||||||
|
if (i > offset) {
|
||||||
|
sb.append(' ');
|
||||||
|
}
|
||||||
|
sb.append(Long.toHexString(longs[i]));
|
||||||
|
}
|
||||||
|
sb.append(']');
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new IntsRef that points to a copy of the longs from
|
||||||
|
* <code>other</code>
|
||||||
|
* <p>
|
||||||
|
* The returned IntsRef will have a length of other.length
|
||||||
|
* and an offset of zero.
|
||||||
|
*/
|
||||||
|
public static LongsRef deepCopyOf(LongsRef other) {
|
||||||
|
LongsRef clone = new LongsRef();
|
||||||
|
clone.copyLongs(other);
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,5 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -24,62 +26,37 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Direct wrapping of 16 bit values to a backing array of shorts.
|
* Direct wrapping of 16-bits values to a backing array.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
final class Direct16 extends PackedInts.MutableImpl {
|
||||||
|
final short[] values;
|
||||||
|
|
||||||
class Direct16 extends PackedInts.MutableImpl {
|
Direct16(int valueCount) {
|
||||||
private final short[] values;
|
super(valueCount, 16);
|
||||||
private static final int BITS_PER_VALUE = 16;
|
|
||||||
|
|
||||||
public Direct16(int valueCount) {
|
|
||||||
super(valueCount, BITS_PER_VALUE);
|
|
||||||
values = new short[valueCount];
|
values = new short[valueCount];
|
||||||
}
|
}
|
||||||
|
|
||||||
public Direct16(DataInput in, int valueCount) throws IOException {
|
Direct16(DataInput in, int valueCount) throws IOException {
|
||||||
super(valueCount, BITS_PER_VALUE);
|
this(valueCount);
|
||||||
short[] values = new short[valueCount];
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
for(int i=0;i<valueCount;i++) {
|
|
||||||
values[i] = in.readShort();
|
values[i] = in.readShort();
|
||||||
}
|
}
|
||||||
final int mod = valueCount % 4;
|
final int mod = valueCount % 4;
|
||||||
if (mod != 0) {
|
if (mod != 0) {
|
||||||
final int pad = 4-mod;
|
for (int i = mod; i < 4; ++i) {
|
||||||
// round out long
|
|
||||||
for(int i=0;i<pad;i++) {
|
|
||||||
in.readShort();
|
in.readShort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array backed by the given values.
|
|
||||||
* </p><p>
|
|
||||||
* Note: The values are used directly, so changes to the values will
|
|
||||||
* affect the structure.
|
|
||||||
* @param values used as the internal backing array.
|
|
||||||
*/
|
|
||||||
public Direct16(short[] values) {
|
|
||||||
super(values.length, BITS_PER_VALUE);
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long get(final int index) {
|
|
||||||
assert index >= 0 && index < size();
|
|
||||||
return 0xFFFFL & values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(final int index, final long value) {
|
|
||||||
values[index] = (short)(value & 0xFFFF);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
public long get(final int index) {
|
||||||
assert (val & 0xffffL) == val;
|
return values[index] & 0xFFFFL;
|
||||||
Arrays.fill(values, fromIndex, toIndex, (short) val);
|
}
|
||||||
|
|
||||||
|
public void set(final int index, final long value) {
|
||||||
|
values[index] = (short) (value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
|
@ -87,7 +64,7 @@ class Direct16 extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
Arrays.fill(values, (short)0);
|
Arrays.fill(values, (short) 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -99,4 +76,35 @@ class Direct16 extends PackedInts.MutableImpl {
|
||||||
public boolean hasArray() {
|
public boolean hasArray() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||||
|
arr[o] = values[i] & 0xFFFFL;
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||||
|
values[i] = (short) arr[o];
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
assert val == (val & 0xFFFFL);
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, (short) val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -24,58 +26,37 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Direct wrapping of 32 bit values to a backing array of ints.
|
* Direct wrapping of 32-bits values to a backing array.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
final class Direct32 extends PackedInts.MutableImpl {
|
||||||
|
final int[] values;
|
||||||
|
|
||||||
class Direct32 extends PackedInts.MutableImpl {
|
Direct32(int valueCount) {
|
||||||
private final int[] values;
|
super(valueCount, 32);
|
||||||
private static final int BITS_PER_VALUE = 32;
|
|
||||||
|
|
||||||
public Direct32(int valueCount) {
|
|
||||||
super(valueCount, BITS_PER_VALUE);
|
|
||||||
values = new int[valueCount];
|
values = new int[valueCount];
|
||||||
}
|
}
|
||||||
|
|
||||||
public Direct32(DataInput in, int valueCount) throws IOException {
|
Direct32(DataInput in, int valueCount) throws IOException {
|
||||||
super(valueCount, BITS_PER_VALUE);
|
this(valueCount);
|
||||||
int[] values = new int[valueCount];
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
for(int i=0;i<valueCount;i++) {
|
|
||||||
values[i] = in.readInt();
|
values[i] = in.readInt();
|
||||||
}
|
}
|
||||||
final int mod = valueCount % 2;
|
final int mod = valueCount % 2;
|
||||||
if (mod != 0) {
|
if (mod != 0) {
|
||||||
in.readInt();
|
for (int i = mod; i < 2; ++i) {
|
||||||
|
in.readInt();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array backed by the given values.
|
|
||||||
* </p><p>
|
|
||||||
* Note: The values are used directly, so changes to the given values will
|
|
||||||
* affect the structure.
|
|
||||||
* @param values used as the internal backing array.
|
|
||||||
*/
|
|
||||||
public Direct32(int[] values) {
|
|
||||||
super(values.length, BITS_PER_VALUE);
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long get(final int index) {
|
|
||||||
assert index >= 0 && index < size();
|
|
||||||
return 0xFFFFFFFFL & values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(final int index, final long value) {
|
|
||||||
values[index] = (int)(value & 0xFFFFFFFF);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
public long get(final int index) {
|
||||||
assert (val & 0xffffffffL) == val;
|
return values[index] & 0xFFFFFFFFL;
|
||||||
Arrays.fill(values, fromIndex, toIndex, (int) val);
|
}
|
||||||
|
|
||||||
|
public void set(final int index, final long value) {
|
||||||
|
values[index] = (int) (value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
|
@ -83,11 +64,11 @@ class Direct32 extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
Arrays.fill(values, 0);
|
Arrays.fill(values, (int) 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int[] getArray() {
|
public Object getArray() {
|
||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,4 +76,35 @@ class Direct32 extends PackedInts.MutableImpl {
|
||||||
public boolean hasArray() {
|
public boolean hasArray() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||||
|
arr[o] = values[i] & 0xFFFFFFFFL;
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||||
|
values[i] = (int) arr[o];
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
assert val == (val & 0xFFFFFFFFL);
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, (int) val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -24,70 +26,31 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Direct wrapping of 64 bit values to a backing array of longs.
|
* Direct wrapping of 64-bits values to a backing array.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
class Direct64 extends PackedInts.MutableImpl {
|
final class Direct64 extends PackedInts.MutableImpl {
|
||||||
private final long[] values;
|
final long[] values;
|
||||||
private static final int BITS_PER_VALUE = 64;
|
|
||||||
|
|
||||||
public Direct64(int valueCount) {
|
Direct64(int valueCount) {
|
||||||
super(valueCount, BITS_PER_VALUE);
|
super(valueCount, 64);
|
||||||
values = new long[valueCount];
|
values = new long[valueCount];
|
||||||
}
|
}
|
||||||
|
|
||||||
public Direct64(DataInput in, int valueCount) throws IOException {
|
Direct64(DataInput in, int valueCount) throws IOException {
|
||||||
super(valueCount, BITS_PER_VALUE);
|
this(valueCount);
|
||||||
long[] values = new long[valueCount];
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
for(int i=0;i<valueCount;i++) {
|
|
||||||
values[i] = in.readLong();
|
values[i] = in.readLong();
|
||||||
}
|
}
|
||||||
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array backed by the given values.
|
|
||||||
* </p><p>
|
|
||||||
* Note: The values are used directly, so changes to the given values will
|
|
||||||
* affect the structure.
|
|
||||||
* @param values used as the internal backing array.
|
|
||||||
*/
|
|
||||||
public Direct64(long[] values) {
|
|
||||||
super(values.length, BITS_PER_VALUE);
|
|
||||||
this.values = values;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public long get(final int index) {
|
public long get(final int index) {
|
||||||
assert index >= 0 && index < size();
|
|
||||||
return values[index];
|
return values[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int get(int index, long[] arr, int off, int len) {
|
|
||||||
assert index >= 0 && index < valueCount;
|
|
||||||
assert off + len <= arr.length;
|
|
||||||
final int gets = Math.min(valueCount - index, len);
|
|
||||||
System.arraycopy(values, index, arr, off, gets);
|
|
||||||
return gets;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(final int index, final long value) {
|
public void set(final int index, final long value) {
|
||||||
values[index] = value;
|
values[index] = (value);
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int set(int index, long[] arr, int off, int len) {
|
|
||||||
assert index >= 0 && index < valueCount;
|
|
||||||
assert off + len <= arr.length;
|
|
||||||
final int sets = Math.min(valueCount - index, len);
|
|
||||||
System.arraycopy(arr, off, values, index, sets);
|
|
||||||
return sets;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
|
||||||
Arrays.fill(values, fromIndex, toIndex, val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
|
@ -99,7 +62,7 @@ class Direct64 extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long[] getArray() {
|
public Object getArray() {
|
||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,4 +71,29 @@ class Direct64 extends PackedInts.MutableImpl {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
System.arraycopy(values, index, arr, off, gets);
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
System.arraycopy(arr, off, values, index, sets);
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -24,63 +26,37 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Direct wrapping of 8 bit values to a backing array of bytes.
|
* Direct wrapping of 8-bits values to a backing array.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
final class Direct8 extends PackedInts.MutableImpl {
|
||||||
|
final byte[] values;
|
||||||
|
|
||||||
class Direct8 extends PackedInts.MutableImpl {
|
Direct8(int valueCount) {
|
||||||
private final byte[] values;
|
super(valueCount, 8);
|
||||||
private static final int BITS_PER_VALUE = 8;
|
|
||||||
|
|
||||||
public Direct8(int valueCount) {
|
|
||||||
super(valueCount, BITS_PER_VALUE);
|
|
||||||
values = new byte[valueCount];
|
values = new byte[valueCount];
|
||||||
}
|
}
|
||||||
|
|
||||||
public Direct8(DataInput in, int valueCount)
|
Direct8(DataInput in, int valueCount) throws IOException {
|
||||||
throws IOException {
|
this(valueCount);
|
||||||
super(valueCount, BITS_PER_VALUE);
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
byte[] values = new byte[valueCount];
|
|
||||||
for(int i=0;i<valueCount;i++) {
|
|
||||||
values[i] = in.readByte();
|
values[i] = in.readByte();
|
||||||
}
|
}
|
||||||
final int mod = valueCount % 8;
|
final int mod = valueCount % 8;
|
||||||
if (mod != 0) {
|
if (mod != 0) {
|
||||||
final int pad = 8-mod;
|
for (int i = mod; i < 8; ++i) {
|
||||||
// round out long
|
|
||||||
for(int i=0;i<pad;i++) {
|
|
||||||
in.readByte();
|
in.readByte();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an array backed by the given values.
|
|
||||||
* </p><p>
|
|
||||||
* Note: The values are used directly, so changes to the given values will
|
|
||||||
* affect the structure.
|
|
||||||
* @param values used as the internal backing array.
|
|
||||||
*/
|
|
||||||
public Direct8(byte[] values) {
|
|
||||||
super(values.length, BITS_PER_VALUE);
|
|
||||||
this.values = values;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long get(final int index) {
|
|
||||||
assert index >= 0 && index < size();
|
|
||||||
return 0xFFL & values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(final int index, final long value) {
|
|
||||||
values[index] = (byte)(value & 0xFF);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
public long get(final int index) {
|
||||||
assert (val & 0xffL) == val;
|
return values[index] & 0xFFL;
|
||||||
Arrays.fill(values, fromIndex, toIndex, (byte) val);
|
}
|
||||||
|
|
||||||
|
public void set(final int index, final long value) {
|
||||||
|
values[index] = (byte) (value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
|
@ -88,7 +64,7 @@ class Direct8 extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
Arrays.fill(values, (byte)0);
|
Arrays.fill(values, (byte) 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -100,4 +76,35 @@ class Direct8 extends PackedInts.MutableImpl {
|
||||||
public boolean hasArray() {
|
public boolean hasArray() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||||
|
arr[o] = values[i] & 0xFFL;
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||||
|
values[i] = (byte) arr[o];
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
assert val == (val & 0xFFL);
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, (byte) val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,7 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataInput;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** 48 bitsPerValue backed by short[] */
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Packs integers into 3 shorts (48 bits per value).
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
|
final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
|
||||||
|
final short[] blocks;
|
||||||
|
|
||||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||||
|
|
||||||
private final short[] blocks;
|
|
||||||
|
|
||||||
Packed16ThreeBlocks(int valueCount) {
|
Packed16ThreeBlocks(int valueCount) {
|
||||||
super(valueCount, 48);
|
super(valueCount, 48);
|
||||||
if (valueCount > MAX_SIZE) {
|
if (valueCount > MAX_SIZE) {
|
||||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||||
}
|
}
|
||||||
this.blocks = new short[3 * valueCount];
|
blocks = new short[valueCount * 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||||
this(valueCount);
|
this(valueCount);
|
||||||
for (int i = 0; i < blocks.length; i++) {
|
for (int i = 0; i < 3 * valueCount; ++i) {
|
||||||
blocks[i] = in.readShort();
|
blocks[i] = in.readShort();
|
||||||
}
|
}
|
||||||
final int mod = blocks.length % 4;
|
final int mod = blocks.length % 4;
|
||||||
if (mod != 0) {
|
if (mod != 0) {
|
||||||
final int pad = 4 - mod;
|
for (int i = mod; i < 4; ++i) {
|
||||||
// round out long
|
in.readShort();
|
||||||
for (int i = 0; i < pad; i++) {
|
|
||||||
in.readShort();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,26 +58,55 @@ final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
|
||||||
@Override
|
@Override
|
||||||
public long get(int index) {
|
public long get(int index) {
|
||||||
final int o = index * 3;
|
final int o = index * 3;
|
||||||
return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
|
return (blocks[o] & 0xFFFFL) << 32 | (blocks[o+1] & 0xFFFFL) << 16 | (blocks[o+2] & 0xFFFFL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||||
|
arr[off++] = (blocks[i] & 0xFFFFL) << 32 | (blocks[i+1] & 0xFFFFL) << 16 | (blocks[i+2] & 0xFFFFL);
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void set(int index, long value) {
|
public void set(int index, long value) {
|
||||||
final int o = index * 3;
|
final int o = index * 3;
|
||||||
blocks[o] = (short) (value >> 32);
|
blocks[o] = (short) (value >>> 32);
|
||||||
blocks[o+1] = (short) (value >> 16);
|
blocks[o+1] = (short) (value >>> 16);
|
||||||
blocks[o+2] = (short) value;
|
blocks[o+2] = (short) value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||||
|
final long value = arr[i];
|
||||||
|
blocks[o++] = (short) (value >>> 32);
|
||||||
|
blocks[o++] = (short) (value >>> 16);
|
||||||
|
blocks[o++] = (short) value;
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
short block1 = (short) (val >> 32);
|
final short block1 = (short) (val >>> 32);
|
||||||
short block2 = (short) (val >> 16);
|
final short block2 = (short) (val >>> 16);
|
||||||
short block3 = (short) val;
|
final short block3 = (short) val;
|
||||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
|
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||||
blocks[i++] = block1;
|
blocks[i] = block1;
|
||||||
blocks[i++] = block2;
|
blocks[i+1] = block2;
|
||||||
blocks[i++] = block3;
|
blocks[i+2] = block3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,49 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||||
& maskRight;
|
& maskRight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
len = Math.min(len, valueCount - index);
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int originalIndex = index;
|
||||||
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||||
|
|
||||||
|
// go to the next block where the value does not span across two blocks
|
||||||
|
final int offsetInBlocks = index % op.values();
|
||||||
|
if (offsetInBlocks != 0) {
|
||||||
|
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
|
||||||
|
arr[off++] = get(index++);
|
||||||
|
--len;
|
||||||
|
}
|
||||||
|
if (len == 0) {
|
||||||
|
return index - originalIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// bulk get
|
||||||
|
assert index % op.values() == 0;
|
||||||
|
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||||
|
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||||
|
final int iterations = len / op.values();
|
||||||
|
op.get(blocks, blockIndex, arr, off, iterations);
|
||||||
|
final int gotValues = iterations * op.values();
|
||||||
|
index += gotValues;
|
||||||
|
len -= gotValues;
|
||||||
|
assert len >= 0;
|
||||||
|
|
||||||
|
if (index > originalIndex) {
|
||||||
|
// stay at the block boundary
|
||||||
|
return index - originalIndex;
|
||||||
|
} else {
|
||||||
|
// no progress so far => already at a block boundary but no full block to get
|
||||||
|
assert index == originalIndex;
|
||||||
|
return super.get(index, arr, off, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void set(final int index, final long value) {
|
public void set(final int index, final long value) {
|
||||||
// The abstract index in a contiguous bit stream
|
// The abstract index in a contiguous bit stream
|
||||||
|
@ -159,6 +202,48 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||||
| (value << (BLOCK_SIZE - endBits));
|
| (value << (BLOCK_SIZE - endBits));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
len = Math.min(len, valueCount - index);
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int originalIndex = index;
|
||||||
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||||
|
|
||||||
|
// go to the next block where the value does not span across two blocks
|
||||||
|
final int offsetInBlocks = index % op.values();
|
||||||
|
if (offsetInBlocks != 0) {
|
||||||
|
for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
|
||||||
|
set(index++, arr[off++]);
|
||||||
|
--len;
|
||||||
|
}
|
||||||
|
if (len == 0) {
|
||||||
|
return index - originalIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// bulk get
|
||||||
|
assert index % op.values() == 0;
|
||||||
|
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||||
|
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||||
|
final int iterations = len / op.values();
|
||||||
|
op.set(blocks, blockIndex, arr, off, iterations);
|
||||||
|
final int setValues = iterations * op.values();
|
||||||
|
index += setValues;
|
||||||
|
len -= setValues;
|
||||||
|
assert len >= 0;
|
||||||
|
|
||||||
|
if (index > originalIndex) {
|
||||||
|
// stay at the block boundary
|
||||||
|
return index - originalIndex;
|
||||||
|
} else {
|
||||||
|
// no progress so far => already at a block boundary but no full block to get
|
||||||
|
assert index == originalIndex;
|
||||||
|
return super.set(index, arr, off, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
|
@ -39,12 +39,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static float overheadPerValue(int bitsPerValue) {
|
|
||||||
int valuesPerBlock = 64 / bitsPerValue;
|
|
||||||
int overhead = 64 % bitsPerValue;
|
|
||||||
return (float) overhead / valuesPerBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||||
return valueCount / valuesPerBlock
|
return valueCount / valuesPerBlock
|
||||||
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
||||||
|
@ -92,16 +86,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
|
|
||||||
// bulk get
|
// bulk get
|
||||||
assert index % valuesPerBlock == 0;
|
assert index % valuesPerBlock == 0;
|
||||||
final long readMask = (1L << bitsPerValue) - 1;
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||||
final int startBlock = index / valuesPerBlock;
|
assert op.blocks() == 1;
|
||||||
final int endBlock = (index + len) / valuesPerBlock;
|
assert op.values() == valuesPerBlock;
|
||||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
final int blockIndex = index / valuesPerBlock;
|
||||||
|
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||||
|
op.get(blocks, blockIndex, arr, off, nblocks);
|
||||||
|
final int diff = nblocks * valuesPerBlock;
|
||||||
index += diff; len -= diff;
|
index += diff; len -= diff;
|
||||||
for (int block = startBlock; block < endBlock; ++block) {
|
|
||||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
|
||||||
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index > originalIndex) {
|
if (index > originalIndex) {
|
||||||
// stay at the block boundary
|
// stay at the block boundary
|
||||||
|
@ -138,17 +130,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
|
|
||||||
// bulk set
|
// bulk set
|
||||||
assert index % valuesPerBlock == 0;
|
assert index % valuesPerBlock == 0;
|
||||||
final int startBlock = index / valuesPerBlock;
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||||
final int endBlock = (index + len) / valuesPerBlock;
|
assert op.blocks() == 1;
|
||||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
assert op.values() == valuesPerBlock;
|
||||||
|
final int blockIndex = index / valuesPerBlock;
|
||||||
|
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||||
|
op.set(blocks, blockIndex, arr, off, nblocks);
|
||||||
|
final int diff = nblocks * valuesPerBlock;
|
||||||
index += diff; len -= diff;
|
index += diff; len -= diff;
|
||||||
for (int block = startBlock; block < endBlock; ++block) {
|
|
||||||
long next = 0L;
|
|
||||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
|
||||||
next |= (arr[off++] << (i * bitsPerValue));
|
|
||||||
}
|
|
||||||
blocks[block] = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index > originalIndex) {
|
if (index > originalIndex) {
|
||||||
// stay at the block boundary
|
// stay at the block boundary
|
||||||
|
@ -202,8 +191,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int getFormat() {
|
protected PackedInts.Format getFormat() {
|
||||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
return PackedInts.Format.PACKED_SINGLE_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,87 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.ReaderIteratorImpl;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
final class Packed64SingleBlockReaderIterator extends ReaderIteratorImpl {
|
|
||||||
|
|
||||||
private long pending;
|
|
||||||
private int shift;
|
|
||||||
private final long mask;
|
|
||||||
private int position;
|
|
||||||
|
|
||||||
Packed64SingleBlockReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
|
|
||||||
super(valueCount, bitsPerValue, in);
|
|
||||||
pending = 0;
|
|
||||||
shift = 64;
|
|
||||||
mask = ~(~0L << bitsPerValue);
|
|
||||||
position = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long next() throws IOException {
|
|
||||||
if (shift + bitsPerValue > 64) {
|
|
||||||
pending = in.readLong();
|
|
||||||
shift = 0;
|
|
||||||
}
|
|
||||||
final long next = (pending >>> shift) & mask;
|
|
||||||
shift += bitsPerValue;
|
|
||||||
++position;
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int ord() {
|
|
||||||
return position;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long advance(int ord) throws IOException {
|
|
||||||
assert ord < valueCount : "ord must be less than valueCount";
|
|
||||||
assert ord > position : "ord must be greater than the current position";
|
|
||||||
|
|
||||||
final int valuesPerBlock = 64 / bitsPerValue;
|
|
||||||
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
|
|
||||||
final long targetBlock = ord / valuesPerBlock;
|
|
||||||
final long blocksToSkip = targetBlock - nextBlock;
|
|
||||||
if (blocksToSkip > 0) {
|
|
||||||
final long skip = blocksToSkip << 3;
|
|
||||||
final long filePointer = in.getFilePointer();
|
|
||||||
|
|
||||||
in.seek(filePointer + skip);
|
|
||||||
shift = 64;
|
|
||||||
|
|
||||||
final int offsetInBlock = ord % valuesPerBlock;
|
|
||||||
for (int i = 0; i < offsetInBlock; ++i) {
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = position; i < ord - 1; ++i) {
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
position = ord - 1;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,81 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.Writer;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link Writer} for {@link Packed64SingleBlock} readers.
|
|
||||||
*/
|
|
||||||
final class Packed64SingleBlockWriter extends Writer {
|
|
||||||
|
|
||||||
private long pending;
|
|
||||||
private int shift;
|
|
||||||
private int written;
|
|
||||||
|
|
||||||
Packed64SingleBlockWriter(DataOutput out, int valueCount,
|
|
||||||
int bitsPerValue) throws IOException {
|
|
||||||
super(out, valueCount, bitsPerValue);
|
|
||||||
assert Packed64SingleBlock.isSupported(bitsPerValue) : bitsPerValue + " is not supported";
|
|
||||||
pending = 0;
|
|
||||||
shift = 0;
|
|
||||||
written = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int getFormat() {
|
|
||||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void add(long v) throws IOException {
|
|
||||||
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
|
||||||
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
|
||||||
assert v >= 0;
|
|
||||||
|
|
||||||
if (shift + bitsPerValue > Long.SIZE) {
|
|
||||||
out.writeLong(pending);
|
|
||||||
pending = 0;
|
|
||||||
shift = 0;
|
|
||||||
}
|
|
||||||
pending |= v << shift;
|
|
||||||
shift += bitsPerValue;
|
|
||||||
++written;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void finish() throws IOException {
|
|
||||||
while (written < valueCount) {
|
|
||||||
add(0L); // Auto flush
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shift > 0) {
|
|
||||||
// add was called at least once
|
|
||||||
out.writeLong(pending);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Packed64SingleBlockWriter(written " + written + "/" + valueCount + " with "
|
|
||||||
+ bitsPerValue + " bits/value)";
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,11 +1,7 @@
|
||||||
|
// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataInput;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** 24 bitsPerValue backed by byte[] */
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Packs integers into 3 bytes (24 bits per value).
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
||||||
|
final byte[] blocks;
|
||||||
|
|
||||||
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
|
||||||
|
|
||||||
private final byte[] blocks;
|
|
||||||
|
|
||||||
Packed8ThreeBlocks(int valueCount) {
|
Packed8ThreeBlocks(int valueCount) {
|
||||||
super(valueCount, 24);
|
super(valueCount, 24);
|
||||||
if (valueCount > MAX_SIZE) {
|
if (valueCount > MAX_SIZE) {
|
||||||
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
|
||||||
}
|
}
|
||||||
this.blocks = new byte[3 * valueCount];
|
blocks = new byte[valueCount * 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
|
||||||
this(valueCount);
|
this(valueCount);
|
||||||
for (int i = 0; i < blocks.length; i++) {
|
for (int i = 0; i < 3 * valueCount; ++i) {
|
||||||
blocks[i] = in.readByte();
|
blocks[i] = in.readByte();
|
||||||
}
|
}
|
||||||
final int mod = blocks.length % 8;
|
final int mod = blocks.length % 8;
|
||||||
if (mod != 0) {
|
if (mod != 0) {
|
||||||
final int pad = 8 - mod;
|
for (int i = mod; i < 8; ++i) {
|
||||||
// round out long
|
in.readByte();
|
||||||
for (int i = 0; i < pad; i++) {
|
|
||||||
in.readByte();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,26 +58,55 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
||||||
@Override
|
@Override
|
||||||
public long get(int index) {
|
public long get(int index) {
|
||||||
final int o = index * 3;
|
final int o = index * 3;
|
||||||
return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
|
return (blocks[o] & 0xFFL) << 16 | (blocks[o+1] & 0xFFL) << 8 | (blocks[o+2] & 0xFFL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||||
|
arr[off++] = (blocks[i] & 0xFFL) << 16 | (blocks[i+1] & 0xFFL) << 8 | (blocks[i+2] & 0xFFL);
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void set(int index, long value) {
|
public void set(int index, long value) {
|
||||||
final int o = index * 3;
|
final int o = index * 3;
|
||||||
|
blocks[o] = (byte) (value >>> 16);
|
||||||
|
blocks[o+1] = (byte) (value >>> 8);
|
||||||
blocks[o+2] = (byte) value;
|
blocks[o+2] = (byte) value;
|
||||||
blocks[o+1] = (byte) (value >> 8);
|
}
|
||||||
blocks[o] = (byte) (value >> 16);
|
|
||||||
|
@Override
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||||
|
final long value = arr[i];
|
||||||
|
blocks[o++] = (byte) (value >>> 16);
|
||||||
|
blocks[o++] = (byte) (value >>> 8);
|
||||||
|
blocks[o++] = (byte) value;
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
byte block1 = (byte) (val >> 16);
|
final byte block1 = (byte) (val >>> 16);
|
||||||
byte block2 = (byte) (val >> 8);
|
final byte block2 = (byte) (val >>> 8);
|
||||||
byte block3 = (byte) val;
|
final byte block3 = (byte) val;
|
||||||
for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
|
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||||
blocks[i++] = block1;
|
blocks[i] = block1;
|
||||||
blocks[i++] = block2;
|
blocks[i+1] = block2;
|
||||||
blocks[i++] = block3;
|
blocks[i+2] = block3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,5 +124,4 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
|
||||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@ -34,7 +35,6 @@ import java.io.IOException;
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class PackedInts {
|
public class PackedInts {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -62,12 +62,184 @@ public class PackedInts {
|
||||||
*/
|
*/
|
||||||
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
|
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
|
||||||
|
|
||||||
final static String CODEC_NAME = "PackedInts";
|
public final static String CODEC_NAME = "PackedInts";
|
||||||
final static int VERSION_START = 0;
|
public final static int VERSION_START = 0;
|
||||||
final static int VERSION_CURRENT = VERSION_START;
|
public final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
static final int PACKED = 0;
|
/**
|
||||||
static final int PACKED_SINGLE_BLOCK = 1;
|
* A format to write packed ints.
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public enum Format {
|
||||||
|
/**
|
||||||
|
* Compact format, all bits are written contiguously.
|
||||||
|
*/
|
||||||
|
PACKED(0) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nblocks(int bitsPerValue, int values) {
|
||||||
|
return (int) Math.ceil((double) values * bitsPerValue / 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A format that may insert padding bits to improve encoding and decoding
|
||||||
|
* speed. Since this format doesn't support all possible bits per value, you
|
||||||
|
* should never use it directly, but rather use
|
||||||
|
* {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the
|
||||||
|
* format that best suits your needs.
|
||||||
|
*/
|
||||||
|
PACKED_SINGLE_BLOCK(1) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nblocks(int bitsPerValue, int values) {
|
||||||
|
final int valuesPerBlock = 64 / bitsPerValue;
|
||||||
|
return (int) Math.ceil((double) values / valuesPerBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isSupported(int bitsPerValue) {
|
||||||
|
return Packed64SingleBlock.isSupported(bitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float overheadPerValue(int bitsPerValue) {
|
||||||
|
assert isSupported(bitsPerValue);
|
||||||
|
final int valuesPerBlock = 64 / bitsPerValue;
|
||||||
|
final int overhead = 64 % bitsPerValue;
|
||||||
|
return (float) overhead / valuesPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a format according to its ID.
|
||||||
|
*/
|
||||||
|
public static Format byId(int id) {
|
||||||
|
for (Format format : Format.values()) {
|
||||||
|
if (format.getId() == id) {
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unknown format id: " + id);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Format(int id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int id;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the ID of the format.
|
||||||
|
*/
|
||||||
|
public int getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes how many blocks are needed to store <code>values</code> values
|
||||||
|
* of size <code>bitsPerValue</code>.
|
||||||
|
*/
|
||||||
|
public abstract int nblocks(int bitsPerValue, int values);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether the provided number of bits per value is supported by the
|
||||||
|
* format.
|
||||||
|
*/
|
||||||
|
public boolean isSupported(int bitsPerValue) {
|
||||||
|
return bitsPerValue >= 1 && bitsPerValue <= 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the overhead per value, in bits.
|
||||||
|
*/
|
||||||
|
public float overheadPerValue(int bitsPerValue) {
|
||||||
|
assert isSupported(bitsPerValue);
|
||||||
|
return 0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the overhead ratio (<code>overhead per value / bits per value</code>).
|
||||||
|
*/
|
||||||
|
public final float overheadRatio(int bitsPerValue) {
|
||||||
|
assert isSupported(bitsPerValue);
|
||||||
|
return overheadPerValue(bitsPerValue) / bitsPerValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple class that holds a format and a number of bits per value.
|
||||||
|
*/
|
||||||
|
public static class FormatAndBits {
|
||||||
|
public final Format format;
|
||||||
|
public final int bitsPerValue;
|
||||||
|
public FormatAndBits(Format format, int bitsPerValue) {
|
||||||
|
this.format = format;
|
||||||
|
this.bitsPerValue = bitsPerValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to find the {@link Format} and number of bits per value that would
|
||||||
|
* restore from disk the fastest reader whose overhead is less than
|
||||||
|
* <code>acceptableOverheadRatio</code>.
|
||||||
|
* </p><p>
|
||||||
|
* The <code>acceptableOverheadRatio</code> parameter makes sense for
|
||||||
|
* random-access {@link Reader}s. In case you only plan to perform
|
||||||
|
* sequential access on this stream later on, you should probably use
|
||||||
|
* {@link PackedInts#COMPACT}.
|
||||||
|
* </p><p>
|
||||||
|
* If you don't know how many values you are going to write, use
|
||||||
|
* <code>valueCount = -1</code>.
|
||||||
|
*/
|
||||||
|
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
|
||||||
|
if (valueCount == -1) {
|
||||||
|
valueCount = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||||
|
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||||
|
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||||
|
|
||||||
|
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||||
|
|
||||||
|
int actualBitsPerValue = -1;
|
||||||
|
Format format = Format.PACKED;
|
||||||
|
|
||||||
|
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||||
|
actualBitsPerValue = 8;
|
||||||
|
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||||
|
actualBitsPerValue = 16;
|
||||||
|
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||||
|
actualBitsPerValue = 32;
|
||||||
|
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||||
|
actualBitsPerValue = 64;
|
||||||
|
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||||
|
actualBitsPerValue = 24;
|
||||||
|
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||||
|
actualBitsPerValue = 48;
|
||||||
|
} else {
|
||||||
|
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||||
|
if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) {
|
||||||
|
float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv);
|
||||||
|
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||||
|
if (overhead <= acceptableOverhead) {
|
||||||
|
actualBitsPerValue = bpv;
|
||||||
|
format = Format.PACKED_SINGLE_BLOCK;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (actualBitsPerValue < 0) {
|
||||||
|
actualBitsPerValue = bitsPerValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new FormatAndBits(format, actualBitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A read-only random access array of positive integers.
|
* A read-only random access array of positive integers.
|
||||||
|
@ -132,30 +304,39 @@ public class PackedInts {
|
||||||
public static interface ReaderIterator extends Closeable {
|
public static interface ReaderIterator extends Closeable {
|
||||||
/** Returns next value */
|
/** Returns next value */
|
||||||
long next() throws IOException;
|
long next() throws IOException;
|
||||||
|
/** Returns at least 1 and at most <code>count</code> next values,
|
||||||
|
* the returned ref MUST NOT be modified */
|
||||||
|
LongsRef next(int count) throws IOException;
|
||||||
/** Returns number of bits per value */
|
/** Returns number of bits per value */
|
||||||
int getBitsPerValue();
|
int getBitsPerValue();
|
||||||
/** Returns number of values */
|
/** Returns number of values */
|
||||||
int size();
|
int size();
|
||||||
/** Returns the current position */
|
/** Returns the current position */
|
||||||
int ord();
|
int ord();
|
||||||
/** Skips to the given ordinal and returns its value.
|
|
||||||
* @return the value at the given position
|
|
||||||
* @throws IOException if reading the value throws an IOException*/
|
|
||||||
long advance(int ord) throws IOException;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static abstract class ReaderIteratorImpl implements ReaderIterator {
|
static abstract class ReaderIteratorImpl implements ReaderIterator {
|
||||||
|
|
||||||
protected final IndexInput in;
|
protected final DataInput in;
|
||||||
protected final int bitsPerValue;
|
protected final int bitsPerValue;
|
||||||
protected final int valueCount;
|
protected final int valueCount;
|
||||||
|
|
||||||
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
|
protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.bitsPerValue = bitsPerValue;
|
this.bitsPerValue = bitsPerValue;
|
||||||
this.valueCount = valueCount;
|
this.valueCount = valueCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long next() throws IOException {
|
||||||
|
LongsRef nextValues = next(1);
|
||||||
|
assert nextValues.length > 0;
|
||||||
|
final long result = nextValues.longs[nextValues.offset];
|
||||||
|
++nextValues.offset;
|
||||||
|
--nextValues.length;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getBitsPerValue() {
|
public int getBitsPerValue() {
|
||||||
return bitsPerValue;
|
return bitsPerValue;
|
||||||
|
@ -168,7 +349,9 @@ public class PackedInts {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
in.close();
|
if (in instanceof Closeable) {
|
||||||
|
((Closeable) in).close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,7 +400,7 @@ public class PackedInts {
|
||||||
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
|
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static abstract class ReaderImpl implements Reader {
|
static abstract class ReaderImpl implements Reader {
|
||||||
protected final int bitsPerValue;
|
protected final int bitsPerValue;
|
||||||
protected final int valueCount;
|
protected final int valueCount;
|
||||||
|
|
||||||
|
@ -257,7 +440,7 @@ public class PackedInts {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
||||||
|
|
||||||
protected MutableImpl(int valueCount, int bitsPerValue) {
|
protected MutableImpl(int valueCount, int bitsPerValue) {
|
||||||
super(valueCount, bitsPerValue);
|
super(valueCount, bitsPerValue);
|
||||||
|
@ -283,13 +466,15 @@ public class PackedInts {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int getFormat() {
|
protected Format getFormat() {
|
||||||
return PACKED;
|
return Format.PACKED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void save(DataOutput out) throws IOException {
|
public void save(DataOutput out) throws IOException {
|
||||||
Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
|
Writer writer = getWriterNoHeader(out, getFormat(),
|
||||||
|
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||||
|
writer.writeHeader();
|
||||||
for (int i = 0; i < valueCount; ++i) {
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
writer.add(get(i));
|
writer.add(get(i));
|
||||||
}
|
}
|
||||||
|
@ -302,121 +487,209 @@ public class PackedInts {
|
||||||
*/
|
*/
|
||||||
public static abstract class Writer {
|
public static abstract class Writer {
|
||||||
protected final DataOutput out;
|
protected final DataOutput out;
|
||||||
protected final int bitsPerValue;
|
|
||||||
protected final int valueCount;
|
protected final int valueCount;
|
||||||
|
protected final int bitsPerValue;
|
||||||
|
|
||||||
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
|
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
assert bitsPerValue <= 64;
|
assert bitsPerValue <= 64;
|
||||||
|
assert valueCount >= 0 || valueCount == -1;
|
||||||
this.out = out;
|
this.out = out;
|
||||||
this.valueCount = valueCount;
|
this.valueCount = valueCount;
|
||||||
this.bitsPerValue = bitsPerValue;
|
this.bitsPerValue = bitsPerValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeHeader() throws IOException {
|
||||||
|
assert valueCount != -1;
|
||||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||||
out.writeVInt(bitsPerValue);
|
out.writeVInt(bitsPerValue);
|
||||||
out.writeVInt(valueCount);
|
out.writeVInt(valueCount);
|
||||||
out.writeVInt(getFormat());
|
out.writeVInt(getFormat().getId());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract int getFormat();
|
/** The format used to serialize values. */
|
||||||
|
protected abstract PackedInts.Format getFormat();
|
||||||
|
|
||||||
|
/** Add a value to the stream. */
|
||||||
public abstract void add(long v) throws IOException;
|
public abstract void add(long v) throws IOException;
|
||||||
|
|
||||||
|
/** The number of bits per value. */
|
||||||
|
public final int bitsPerValue() {
|
||||||
|
return bitsPerValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Perform end-of-stream operations. */
|
||||||
public abstract void finish() throws IOException;
|
public abstract void finish() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current ord in the stream (number of values that have been
|
||||||
|
* written so far minus one).
|
||||||
|
*/
|
||||||
|
public abstract int ord();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve PackedInt data from the DataInput and return a packed int
|
* Expert: Restore a {@link Reader} from a stream without reading metadata at
|
||||||
* structure based on it.
|
* the beginning of the stream. This method is useful to restore data from
|
||||||
|
* streams which have been created using
|
||||||
|
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||||
*
|
*
|
||||||
* @param in positioned at the beginning of a stored packed int structure.
|
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||||
* @return a read only random access capable array of positive integers.
|
* @param format the format used to serialize
|
||||||
* @throws IOException if the structure could not be retrieved.
|
* @param version the version used to serialize the data
|
||||||
|
* @param valueCount how many values the stream holds
|
||||||
|
* @param bitsPerValue the number of bits per value
|
||||||
|
* @return a Reader
|
||||||
|
* @throws IOException
|
||||||
|
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static Reader getReader(DataInput in) throws IOException {
|
public static Reader getReaderNoHeader(DataInput in, Format format, int version,
|
||||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
int valueCount, int bitsPerValue) throws IOException {
|
||||||
final int bitsPerValue = in.readVInt();
|
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
|
||||||
final int valueCount = in.readVInt();
|
|
||||||
final int format = in.readVInt();
|
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
|
case PACKED_SINGLE_BLOCK:
|
||||||
|
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||||
case PACKED:
|
case PACKED:
|
||||||
switch (bitsPerValue) {
|
switch (bitsPerValue) {
|
||||||
case 8:
|
case 8:
|
||||||
return new Direct8(in, valueCount);
|
return new Direct8(in, valueCount);
|
||||||
case 16:
|
case 16:
|
||||||
return new Direct16(in, valueCount);
|
return new Direct16(in, valueCount);
|
||||||
case 24:
|
|
||||||
return new Packed8ThreeBlocks(in, valueCount);
|
|
||||||
case 32:
|
case 32:
|
||||||
return new Direct32(in, valueCount);
|
return new Direct32(in, valueCount);
|
||||||
case 48:
|
|
||||||
return new Packed16ThreeBlocks(in, valueCount);
|
|
||||||
case 64:
|
case 64:
|
||||||
return new Direct64(in, valueCount);
|
return new Direct64(in, valueCount);
|
||||||
default:
|
case 24:
|
||||||
return new Packed64(in, valueCount, bitsPerValue);
|
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||||
|
return new Packed8ThreeBlocks(in, valueCount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 48:
|
||||||
|
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||||
|
return new Packed16ThreeBlocks(in, valueCount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case PACKED_SINGLE_BLOCK:
|
return new Packed64(in, valueCount, bitsPerValue);
|
||||||
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
|
|
||||||
default:
|
default:
|
||||||
throw new AssertionError("Unknwown Writer format: " + format);
|
throw new AssertionError("Unknwown Writer format: " + format);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restore a {@link Reader} from a stream.
|
||||||
|
*
|
||||||
|
* @param in the stream to read data from
|
||||||
|
* @return a Reader
|
||||||
|
* @throws IOException
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static Reader getReader(DataInput in) throws IOException {
|
||||||
|
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||||
|
final int bitsPerValue = in.readVInt();
|
||||||
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
|
final int valueCount = in.readVInt();
|
||||||
|
final Format format = Format.byId(in.readVInt());
|
||||||
|
|
||||||
|
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: Restore a {@link ReaderIterator} from a stream without reading
|
||||||
|
* metadata at the beginning of the stream. This method is useful to restore
|
||||||
|
* data from streams which have been created using
|
||||||
|
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||||
|
*
|
||||||
|
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||||
|
* @param format the format used to serialize
|
||||||
|
* @param version the version used to serialize the data
|
||||||
|
* @param valueCount how many values the stream holds
|
||||||
|
* @param bitsPerValue the number of bits per value
|
||||||
|
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
|
||||||
|
* @return a ReaderIterator
|
||||||
|
* @throws IOException
|
||||||
|
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
|
||||||
|
int valueCount, int bitsPerValue, int mem) throws IOException {
|
||||||
|
return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve PackedInts as a {@link ReaderIterator}
|
* Retrieve PackedInts as a {@link ReaderIterator}
|
||||||
* @param in positioned at the beginning of a stored packed int structure.
|
* @param in positioned at the beginning of a stored packed int structure.
|
||||||
|
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
|
||||||
* @return an iterator to access the values
|
* @return an iterator to access the values
|
||||||
* @throws IOException if the structure could not be retrieved.
|
* @throws IOException if the structure could not be retrieved.
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
|
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
|
||||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||||
final int bitsPerValue = in.readVInt();
|
final int bitsPerValue = in.readVInt();
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
final int valueCount = in.readVInt();
|
final int valueCount = in.readVInt();
|
||||||
final int format = in.readVInt();
|
final Format format = Format.byId(in.readVInt());
|
||||||
switch (format) {
|
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
|
||||||
case PACKED:
|
|
||||||
return new PackedReaderIterator(valueCount, bitsPerValue, in);
|
|
||||||
case PACKED_SINGLE_BLOCK:
|
|
||||||
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
|
|
||||||
default:
|
|
||||||
throw new AssertionError("Unknwown Writer format: " + format);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve PackedInts.Reader that does not load values
|
* Expert: Construct a direct {@link Reader} from a stream without reading
|
||||||
* into RAM but rather accesses all values via the
|
* metadata at the beginning of the stream. This method is useful to restore
|
||||||
* provided IndexInput.
|
* data from streams which have been created using
|
||||||
* @param in positioned at the beginning of a stored packed int structure.
|
* {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||||
* @return an Reader to access the values
|
* </p><p>
|
||||||
* @throws IOException if the structure could not be retrieved.
|
* The returned reader will have very little memory overhead, but every call
|
||||||
|
* to {@link Reader#get(int)} is likely to perform a disk seek.
|
||||||
|
*
|
||||||
|
* @param in the stream to read data from
|
||||||
|
* @param format the format used to serialize
|
||||||
|
* @param version the version used to serialize the data
|
||||||
|
* @param valueCount how many values the stream holds
|
||||||
|
* @param bitsPerValue the number of bits per value
|
||||||
|
* @return a direct Reader
|
||||||
|
* @throws IOException
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static Reader getDirectReader(IndexInput in) throws IOException {
|
public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
|
||||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
int version, int valueCount, int bitsPerValue) throws IOException {
|
||||||
final int bitsPerValue = in.readVInt();
|
|
||||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
|
||||||
final int valueCount = in.readVInt();
|
|
||||||
final int format = in.readVInt();
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case PACKED:
|
case PACKED:
|
||||||
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||||
case PACKED_SINGLE_BLOCK:
|
case PACKED_SINGLE_BLOCK:
|
||||||
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||||
default:
|
default:
|
||||||
throw new AssertionError("Unknwown Writer format: " + format);
|
throw new AssertionError("Unknwown format: " + format);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a direct {@link Reader} from an {@link IndexInput}. This method
|
||||||
|
* is useful to restore data from streams which have been created using
|
||||||
|
* {@link PackedInts#getWriter(DataOutput, int, int, float)}.
|
||||||
|
* </p><p>
|
||||||
|
* The returned reader will have very little memory overhead, but every call
|
||||||
|
* to {@link Reader#get(int)} is likely to perform a disk seek.
|
||||||
|
*
|
||||||
|
* @param in the stream to read data from
|
||||||
|
* @return a direct Reader
|
||||||
|
* @throws IOException
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static Reader getDirectReader(IndexInput in) throws IOException {
|
||||||
|
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||||
|
final int bitsPerValue = in.readVInt();
|
||||||
|
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||||
|
final int valueCount = in.readVInt();
|
||||||
|
final Format format = Format.byId(in.readVInt());
|
||||||
|
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a packed integer array with the given amount of values initialized
|
* Create a packed integer array with the given amount of values initialized
|
||||||
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
|
||||||
* All Mutables known by this factory are kept fully in RAM.
|
* All Mutables known by this factory are kept fully in RAM.
|
||||||
*
|
* </p><p>
|
||||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
||||||
* for speed by selecting a faster but potentially less memory-efficient
|
* for speed by selecting a faster but potentially less memory-efficient
|
||||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||||
|
@ -433,103 +706,130 @@ public class PackedInts {
|
||||||
*/
|
*/
|
||||||
public static Mutable getMutable(int valueCount,
|
public static Mutable getMutable(int valueCount,
|
||||||
int bitsPerValue, float acceptableOverheadRatio) {
|
int bitsPerValue, float acceptableOverheadRatio) {
|
||||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
assert valueCount >= 0;
|
||||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
|
||||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
|
||||||
|
|
||||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||||
|
switch (formatAndBits.format) {
|
||||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
case PACKED_SINGLE_BLOCK:
|
||||||
return new Direct8(valueCount);
|
return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
|
||||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
case PACKED:
|
||||||
return new Direct16(valueCount);
|
switch (formatAndBits.bitsPerValue) {
|
||||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
case 8:
|
||||||
return new Direct32(valueCount);
|
return new Direct8(valueCount);
|
||||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
case 16:
|
||||||
return new Direct64(valueCount);
|
return new Direct16(valueCount);
|
||||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
case 32:
|
||||||
return new Packed8ThreeBlocks(valueCount);
|
return new Direct32(valueCount);
|
||||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
case 64:
|
||||||
return new Packed16ThreeBlocks(valueCount);
|
return new Direct64(valueCount);
|
||||||
} else {
|
case 24:
|
||||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
|
||||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
return new Packed8ThreeBlocks(valueCount);
|
||||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
}
|
||||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
break;
|
||||||
if (overhead <= acceptableOverhead) {
|
case 48:
|
||||||
return Packed64SingleBlock.create(valueCount, bpv);
|
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
|
||||||
}
|
return new Packed16ThreeBlocks(valueCount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
return new Packed64(valueCount, formatAndBits.bitsPerValue);
|
||||||
return new Packed64(valueCount, bitsPerValue);
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a packed integer array writer for the given number of values at the
|
* Expert: Create a packed integer array writer for the given output, format,
|
||||||
* given bits/value. Writers append to the given IndexOutput and has very
|
* value count, and number of bits per value.
|
||||||
* low memory overhead.
|
* </p><p>
|
||||||
|
* The resulting stream will be long-aligned. This means that depending on
|
||||||
|
* the format which is used, up to 63 bits will be wasted. An easy way to
|
||||||
|
* make sure that no space is lost is to always use a <code>valueCount</code>
|
||||||
|
* that is a multiple of 64.
|
||||||
|
* </p><p>
|
||||||
|
* This method does not write any metadata to the stream, meaning that it is
|
||||||
|
* your responsibility to store it somewhere else in order to be able to
|
||||||
|
* recover data from the stream later on:
|
||||||
|
* <ul>
|
||||||
|
* <li><code>format</code> (using {@link Format#getId()}),</li>
|
||||||
|
* <li><code>valueCount</code>,</li>
|
||||||
|
* <li><code>bitsPerValue</code>,</li>
|
||||||
|
* <li>{@link #VERSION_CURRENT}.</li>
|
||||||
|
* </ul>
|
||||||
|
* </p><p>
|
||||||
|
* It is possible to start writing values without knowing how many of them you
|
||||||
|
* are actually going to write. To do this, just pass <code>-1</code> as
|
||||||
|
* <code>valueCount</code>. On the other hand, for any positive value of
|
||||||
|
* <code>valueCount</code>, the returned writer will make sure that you don't
|
||||||
|
* write more values than expected and pad the end of stream with zeros in
|
||||||
|
* case you have written less than <code>valueCount</code> when calling
|
||||||
|
* {@link Writer#finish()}.
|
||||||
|
* </p><p>
|
||||||
|
* The <code>mem</code> parameter lets you control how much memory can be used
|
||||||
|
* to buffer changes in memory before flushing to disk. High values of
|
||||||
|
* <code>mem</code> are likely to improve throughput. On the other hand, if
|
||||||
|
* speed is not that important to you, a value of <code>0</code> will use as
|
||||||
|
* little memory as possible and should already offer reasonable throughput.
|
||||||
*
|
*
|
||||||
* Positive values of <code>acceptableOverheadRatio</code> will trade space
|
* @param out the data output
|
||||||
|
* @param format the format to use to serialize the values
|
||||||
|
* @param valueCount the number of values
|
||||||
|
* @param bitsPerValue the number of bits per value
|
||||||
|
* @param mem how much memory (in bytes) can be used to speed up serialization
|
||||||
|
* @return a Writer
|
||||||
|
* @throws IOException
|
||||||
|
* @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
|
||||||
|
* @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static Writer getWriterNoHeader(
|
||||||
|
DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) throws IOException {
|
||||||
|
return new PackedWriter(format, out, valueCount, bitsPerValue, mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a packed integer array writer for the given output, format, value
|
||||||
|
* count, and number of bits per value.
|
||||||
|
* </p><p>
|
||||||
|
* The resulting stream will be long-aligned. This means that depending on
|
||||||
|
* the format which is used under the hoods, up to 63 bits will be wasted.
|
||||||
|
* An easy way to make sure that no space is lost is to always use a
|
||||||
|
* <code>valueCount</code> that is a multiple of 64.
|
||||||
|
* </p><p>
|
||||||
|
* This method writes metadata to the stream, so that the resulting stream is
|
||||||
|
* sufficient to restore a {@link Reader} from it. You don't need to track
|
||||||
|
* <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case
|
||||||
|
* this is a problem, you should probably look at
|
||||||
|
* {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||||
|
* </p><p>
|
||||||
|
* The <code>acceptableOverheadRatio</code> parameter controls how
|
||||||
|
* readers that will be restored from this stream trade space
|
||||||
* for speed by selecting a faster but potentially less memory-efficient
|
* for speed by selecting a faster but potentially less memory-efficient
|
||||||
* implementation. An <code>acceptableOverheadRatio</code> of
|
* implementation. An <code>acceptableOverheadRatio</code> of
|
||||||
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
|
||||||
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||||
* that the fastest implementation is selected.
|
* that the fastest implementation is selected. In case you are only interested
|
||||||
|
* in reading this stream sequentially later on, you should probably use
|
||||||
|
* {@link PackedInts#COMPACT}.
|
||||||
*
|
*
|
||||||
* @param out the destination for the produced bits.
|
* @param out the data output
|
||||||
* @param valueCount the number of elements.
|
* @param valueCount the number of values
|
||||||
* @param bitsPerValue the number of bits available for any given value.
|
* @param bitsPerValue the number of bits per value
|
||||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||||
* @return a Writer ready for receiving values.
|
* @return a Writer
|
||||||
* @throws IOException if bits could not be written to out.
|
* @throws IOException
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static Writer getWriter(DataOutput out,
|
public static Writer getWriter(DataOutput out,
|
||||||
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
|
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
assert valueCount >= 0;
|
||||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
|
||||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
|
||||||
|
|
||||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||||
|
final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
writer.writeHeader();
|
||||||
return getWriterByFormat(out, valueCount, 8, PACKED);
|
return writer;
|
||||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
|
||||||
return getWriterByFormat(out, valueCount, 16, PACKED);
|
|
||||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
|
||||||
return getWriterByFormat(out, valueCount, 32, PACKED);
|
|
||||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
|
||||||
return getWriterByFormat(out, valueCount, 64, PACKED);
|
|
||||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
|
||||||
return getWriterByFormat(out, valueCount, 24, PACKED);
|
|
||||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
|
||||||
return getWriterByFormat(out, valueCount, 48, PACKED);
|
|
||||||
} else {
|
|
||||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
|
||||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
|
||||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
|
||||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
|
||||||
if (overhead <= acceptableOverhead) {
|
|
||||||
return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Writer getWriterByFormat(DataOutput out,
|
|
||||||
int valueCount, int bitsPerValue, int format) throws IOException {
|
|
||||||
switch (format) {
|
|
||||||
case PACKED:
|
|
||||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
|
||||||
case PACKED_SINGLE_BLOCK:
|
|
||||||
return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("Unknown format " + format);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns how many bits are required to hold values up
|
/** Returns how many bits are required to hold values up
|
||||||
|
|
|
@ -17,76 +17,71 @@ package org.apache.lucene.util.packed;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import java.io.EOFException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
|
||||||
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
||||||
private long pending;
|
|
||||||
private int pendingBitsLeft;
|
|
||||||
private int position = -1;
|
|
||||||
|
|
||||||
// masks[n-1] masks for bottom n bits
|
final PackedInts.Format format;
|
||||||
private final long[] masks;
|
final BulkOperation bulkOperation;
|
||||||
|
final long[] nextBlocks;
|
||||||
|
final LongsRef nextValues;
|
||||||
|
final int iterations;
|
||||||
|
int position;
|
||||||
|
|
||||||
public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
|
PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
|
||||||
super(valueCount, bitsPerValue, in);
|
super(valueCount, bitsPerValue, in);
|
||||||
|
this.format = format;
|
||||||
masks = new long[bitsPerValue];
|
bulkOperation = BulkOperation.of(format, bitsPerValue);
|
||||||
|
iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||||
long v = 1;
|
assert iterations > 0;
|
||||||
for (int i = 0; i < bitsPerValue; i++) {
|
nextBlocks = new long[iterations * bulkOperation.blocks()];
|
||||||
v *= 2;
|
nextValues = new LongsRef(new long[iterations * bulkOperation.values()], 0, 0);
|
||||||
masks[i] = v - 1;
|
assert iterations * bulkOperation.values() == nextValues.longs.length;
|
||||||
}
|
assert iterations * bulkOperation.blocks() == nextBlocks.length;
|
||||||
|
nextValues.offset = nextValues.longs.length;
|
||||||
|
position = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long next() throws IOException {
|
@Override
|
||||||
if (pendingBitsLeft == 0) {
|
public LongsRef next(int count) throws IOException {
|
||||||
pending = in.readLong();
|
assert nextValues.length >= 0;
|
||||||
pendingBitsLeft = 64;
|
assert count > 0;
|
||||||
}
|
assert nextValues.offset + nextValues.length <= nextValues.longs.length;
|
||||||
|
|
||||||
final long result;
|
nextValues.offset += nextValues.length;
|
||||||
if (pendingBitsLeft >= bitsPerValue) { // not split
|
|
||||||
result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
|
final int remaining = valueCount - position - 1;
|
||||||
pendingBitsLeft -= bitsPerValue;
|
if (remaining <= 0) {
|
||||||
} else { // split
|
throw new EOFException();
|
||||||
final int bits1 = bitsPerValue - pendingBitsLeft;
|
|
||||||
final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
|
|
||||||
pending = in.readLong();
|
|
||||||
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
|
|
||||||
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
|
|
||||||
result = result1 | result2;
|
|
||||||
}
|
}
|
||||||
|
count = Math.min(remaining, count);
|
||||||
++position;
|
|
||||||
return result;
|
if (nextValues.offset == nextValues.longs.length) {
|
||||||
|
final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
|
||||||
|
final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
|
||||||
|
for (int i = 0; i < blocksToRead; ++i) {
|
||||||
|
nextBlocks[i] = in.readLong();
|
||||||
|
}
|
||||||
|
for (int i = blocksToRead; i < nextBlocks.length; ++i) {
|
||||||
|
nextBlocks[i] = 0L;
|
||||||
|
}
|
||||||
|
|
||||||
|
bulkOperation.get(nextBlocks, 0, nextValues.longs, 0, iterations);
|
||||||
|
nextValues.offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nextValues.length = Math.min(nextValues.longs.length - nextValues.offset, count);
|
||||||
|
position += nextValues.length;
|
||||||
|
return nextValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int ord() {
|
public int ord() {
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long advance(final int ord) throws IOException{
|
|
||||||
assert ord < valueCount : "ord must be less than valueCount";
|
|
||||||
assert ord > position : "ord must be greater than the current position";
|
|
||||||
final long bits = (long) bitsPerValue;
|
|
||||||
final int posToSkip = ord - 1 - position;
|
|
||||||
final long bitsToSkip = (bits * (long)posToSkip);
|
|
||||||
if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
|
|
||||||
pendingBitsLeft -= bitsToSkip;
|
|
||||||
} else {
|
|
||||||
final long skip = bitsToSkip-pendingBitsLeft;
|
|
||||||
final long closestByte = (skip >> 6) << 3;
|
|
||||||
if (closestByte != 0) { // need to seek
|
|
||||||
final long filePointer = in.getFilePointer();
|
|
||||||
in.seek(filePointer + closestByte);
|
|
||||||
}
|
|
||||||
pending = in.readLong();
|
|
||||||
pendingBitsLeft = 64 - (int)(skip % 64);
|
|
||||||
}
|
|
||||||
position = ord-1;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,101 +19,79 @@ package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
// Packs high order byte first, to match
|
// Packs high order byte first, to match
|
||||||
// IndexOutput.writeInt/Long/Short byte order
|
// IndexOutput.writeInt/Long/Short byte order
|
||||||
|
|
||||||
/**
|
final class PackedWriter extends PackedInts.Writer {
|
||||||
* Generic writer for space-optimal packed values. The resulting bits can be
|
|
||||||
* used directly by Packed32, Packed64 and PackedDirect* and will always be
|
|
||||||
* long-aligned.
|
|
||||||
*/
|
|
||||||
|
|
||||||
class PackedWriter extends PackedInts.Writer {
|
boolean finished;
|
||||||
private long pending;
|
final PackedInts.Format format;
|
||||||
private int pendingBitPos;
|
final BulkOperation bulkOperation;
|
||||||
|
final long[] nextBlocks;
|
||||||
|
final long[] nextValues;
|
||||||
|
final int iterations;
|
||||||
|
int off;
|
||||||
|
int written;
|
||||||
|
|
||||||
// masks[n-1] masks for bottom n bits
|
PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem)
|
||||||
private final long[] masks;
|
throws IOException {
|
||||||
private int written = 0;
|
|
||||||
|
|
||||||
public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
|
|
||||||
throws IOException {
|
|
||||||
super(out, valueCount, bitsPerValue);
|
super(out, valueCount, bitsPerValue);
|
||||||
|
this.format = format;
|
||||||
pendingBitPos = 64;
|
bulkOperation = BulkOperation.of(format, bitsPerValue);
|
||||||
masks = new long[bitsPerValue - 1];
|
iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||||
|
nextBlocks = new long[iterations * bulkOperation.blocks()];
|
||||||
long v = 1;
|
nextValues = new long[iterations * bulkOperation.values()];
|
||||||
for (int i = 0; i < bitsPerValue - 1; i++) {
|
off = 0;
|
||||||
v *= 2;
|
written = 0;
|
||||||
masks[i] = v - 1;
|
finished = false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int getFormat() {
|
protected PackedInts.Format getFormat() {
|
||||||
return PackedInts.PACKED;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Do not call this after finish
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void add(long v) throws IOException {
|
public void add(long v) throws IOException {
|
||||||
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
|
assert v >= 0 && v <= PackedInts.maxValue(bitsPerValue);
|
||||||
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
|
assert !finished;
|
||||||
assert v >= 0;
|
if (valueCount != -1 && written >= valueCount) {
|
||||||
//System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
|
throw new EOFException("Writing past end of stream");
|
||||||
|
|
||||||
// TODO
|
|
||||||
if (pendingBitPos >= bitsPerValue) {
|
|
||||||
// not split
|
|
||||||
|
|
||||||
// write-once, so we can |= w/o first masking to 0s
|
|
||||||
pending |= v << (pendingBitPos - bitsPerValue);
|
|
||||||
if (pendingBitPos == bitsPerValue) {
|
|
||||||
// flush
|
|
||||||
out.writeLong(pending);
|
|
||||||
pending = 0;
|
|
||||||
pendingBitPos = 64;
|
|
||||||
} else {
|
|
||||||
pendingBitPos -= bitsPerValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// split
|
|
||||||
|
|
||||||
// write top pendingBitPos bits of value into bottom bits of pending
|
|
||||||
pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
|
|
||||||
//System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
|
|
||||||
|
|
||||||
// flush
|
|
||||||
out.writeLong(pending);
|
|
||||||
|
|
||||||
// write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
|
|
||||||
pendingBitPos = 64 - bitsPerValue + pendingBitPos;
|
|
||||||
//System.out.println(" part2 v << " + pendingBitPos);
|
|
||||||
pending = (v << pendingBitPos);
|
|
||||||
}
|
}
|
||||||
written++;
|
nextValues[off++] = v;
|
||||||
|
if (off == nextValues.length) {
|
||||||
|
flush(nextValues.length);
|
||||||
|
off = 0;
|
||||||
|
}
|
||||||
|
++written;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish() throws IOException {
|
public void finish() throws IOException {
|
||||||
while (written < valueCount) {
|
assert !finished;
|
||||||
add(0L); // Auto flush
|
if (valueCount != -1) {
|
||||||
|
while (written < valueCount) {
|
||||||
|
add(0L);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
flush(off);
|
||||||
|
finished = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (pendingBitPos != 64) {
|
private void flush(int nvalues) throws IOException {
|
||||||
out.writeLong(pending);
|
bulkOperation.set(nextBlocks, 0, nextValues, 0, iterations);
|
||||||
|
final int blocks = format.nblocks(bitsPerValue, nvalues);
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
out.writeLong(nextBlocks[i]);
|
||||||
}
|
}
|
||||||
|
off = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public int ord() {
|
||||||
return "PackedWriter(written " + written + "/" + valueCount + " with "
|
return written - 1;
|
||||||
+ bitsPerValue + " bits/value)";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,264 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from fractions import gcd
|
||||||
|
|
||||||
|
"""Code generation for bulk operations"""
|
||||||
|
|
||||||
|
PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
|
||||||
|
OUTPUT_FILE = "BulkOperation.java"
|
||||||
|
HEADER = """// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.EnumMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Efficient sequential read/write of packed integers.
|
||||||
|
*/
|
||||||
|
abstract class BulkOperation {
|
||||||
|
|
||||||
|
static final EnumMap<PackedInts.Format, BulkOperation[]> BULK_OPERATIONS = new EnumMap<PackedInts.Format, BulkOperation[]>(PackedInts.Format.class);
|
||||||
|
|
||||||
|
public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {
|
||||||
|
assert bitsPerValue > 0 && bitsPerValue <= 64;
|
||||||
|
BulkOperation[] ops = BULK_OPERATIONS.get(format);
|
||||||
|
if (ops == null || ops[bitsPerValue] == null) {
|
||||||
|
throw new IllegalArgumentException("format: " + format + ", bitsPerValue: " + bitsPerValue);
|
||||||
|
}
|
||||||
|
return ops[bitsPerValue];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For every number of bits per value, there is a minimum number of
|
||||||
|
* blocks (b) / values (v) you need to write in order to reach the next block
|
||||||
|
* boundary:
|
||||||
|
* - 16 bits per value -> b=1, v=4
|
||||||
|
* - 24 bits per value -> b=3, v=8
|
||||||
|
* - 50 bits per value -> b=25, v=32
|
||||||
|
* - 63 bits per value -> b=63, v = 64
|
||||||
|
* - ...
|
||||||
|
*
|
||||||
|
* A bulk read consists in copying <code>iterations*v</code> values that are
|
||||||
|
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
|
||||||
|
* (higher values of <code>iterations</code> are likely to yield a better
|
||||||
|
* throughput) => this requires n * (b + v) longs in memory.
|
||||||
|
*
|
||||||
|
* This method computes <code>iterations</code> as
|
||||||
|
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
|
||||||
|
*/
|
||||||
|
public final int computeIterations(int valueCount, int ramBudget) {
|
||||||
|
final int iterations = (ramBudget >>> 3) / (blocks() + values());
|
||||||
|
if (iterations == 0) {
|
||||||
|
// at least 1
|
||||||
|
return 1;
|
||||||
|
} else if ((iterations - 1) * blocks() >= valueCount) {
|
||||||
|
// don't allocate for more than the size of the reader
|
||||||
|
return (int) Math.ceil((double) valueCount / values());
|
||||||
|
} else {
|
||||||
|
return iterations;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The minimum number of blocks required to perform a bulk get/set.
|
||||||
|
*/
|
||||||
|
public abstract int blocks();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of values that can be stored in <code>blocks()</code> blocks.
|
||||||
|
*/
|
||||||
|
public abstract int values();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get <code>n * values()</code> values from <code>n * blocks()</code> blocks.
|
||||||
|
*/
|
||||||
|
public abstract void get(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set <code>n * values()</code> values into <code>n * blocks()</code> blocks.
|
||||||
|
*/
|
||||||
|
public abstract void set(long[] blocks, int blockIndex, long[] values, int valuesIndex, int iterations);
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
FOOTER = "}"
|
||||||
|
|
||||||
|
def packed64singleblock(bpv, f):
|
||||||
|
values = 64 / bpv
|
||||||
|
f.write("\n static final class Packed64SingleBlockBulkOperation%d extends BulkOperation {\n\n" %bpv)
|
||||||
|
f.write(" public int blocks() {\n")
|
||||||
|
f.write(" return 1;\n")
|
||||||
|
f.write(" }\n\n")
|
||||||
|
f.write(" public int values() {\n")
|
||||||
|
f.write(" return %d;\n" %values)
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||||
|
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||||
|
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||||
|
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||||
|
f.write(" final long block = blocks[bi++];\n")
|
||||||
|
mask = (1 << bpv) - 1
|
||||||
|
for i in xrange(values):
|
||||||
|
block_offset = i / values
|
||||||
|
offset_in_block = i % values
|
||||||
|
if i == 0:
|
||||||
|
f.write(" values[vi++] = block & %dL;\n" %mask)
|
||||||
|
elif i == values - 1:
|
||||||
|
f.write(" values[vi++] = block >>> %d;\n" %(i * bpv))
|
||||||
|
else:
|
||||||
|
f.write(" values[vi++] = (block >>> %d) & %dL;\n" %(i * bpv, mask))
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||||
|
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||||
|
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||||
|
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||||
|
for i in xrange(values):
|
||||||
|
block_offset = i / values
|
||||||
|
offset_in_block = i % values
|
||||||
|
if i == 0:
|
||||||
|
f.write(" blocks[bi++] = values[vi++]")
|
||||||
|
else:
|
||||||
|
f.write(" | (values[vi++] << %d)" %(i * bpv))
|
||||||
|
if i == values - 1:
|
||||||
|
f.write(";\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
|
||||||
|
f.write(" }\n")
|
||||||
|
|
||||||
|
def packed64(bpv, f):
|
||||||
|
blocks = bpv
|
||||||
|
values = blocks * 64 / bpv
|
||||||
|
while blocks % 2 == 0 and values % 2 == 0:
|
||||||
|
blocks /= 2
|
||||||
|
values /= 2
|
||||||
|
assert values * bpv == 64 * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
|
||||||
|
mask = (1 << bpv) - 1
|
||||||
|
f.write(" static final class Packed64BulkOperation%d extends BulkOperation {\n\n" %bpv)
|
||||||
|
f.write(" public int blocks() {\n")
|
||||||
|
f.write(" return %d;\n" %blocks)
|
||||||
|
f.write(" }\n\n")
|
||||||
|
f.write(" public int values() {\n")
|
||||||
|
f.write(" return %d;\n" %values)
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
if bpv == 64:
|
||||||
|
f.write(""" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {
|
||||||
|
System.arraycopy(blocks, bi, values, vi, iterations);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {
|
||||||
|
System.arraycopy(values, bi, blocks, vi, iterations);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
f.write(" public void get(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||||
|
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||||
|
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||||
|
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||||
|
for i in xrange(0, values):
|
||||||
|
block_offset = i * bpv / 64
|
||||||
|
bit_offset = (i * bpv) % 64
|
||||||
|
if bit_offset == 0:
|
||||||
|
# start of block
|
||||||
|
f.write(" final long block%d = blocks[bi++];\n" %block_offset);
|
||||||
|
f.write(" values[vi++] = block%d >>> %d;\n" %(block_offset, 64 - bpv))
|
||||||
|
elif bit_offset + bpv == 64:
|
||||||
|
# end of block
|
||||||
|
f.write(" values[vi++] = block%d & %dL;\n" %(block_offset, mask))
|
||||||
|
elif bit_offset + bpv < 64:
|
||||||
|
# middle of block
|
||||||
|
f.write(" values[vi++] = (block%d >>> %d) & %dL;\n" %(block_offset, 64 - bit_offset - bpv, mask))
|
||||||
|
else:
|
||||||
|
# value spans across 2 blocks
|
||||||
|
mask1 = (1 << (64 - bit_offset)) -1
|
||||||
|
shift1 = bit_offset + bpv - 64
|
||||||
|
shift2 = 64 - shift1
|
||||||
|
f.write(" final long block%d = blocks[bi++];\n" %(block_offset + 1));
|
||||||
|
f.write(" values[vi++] = ((block%d & %dL) << %d) | (block%d >>> %d);\n" %(block_offset, mask1, shift1, block_offset + 1, shift2))
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
f.write(" public void set(long[] blocks, int bi, long[] values, int vi, int iterations) {\n")
|
||||||
|
f.write(" assert bi + iterations * blocks() <= blocks.length;\n")
|
||||||
|
f.write(" assert vi + iterations * values() <= values.length;\n")
|
||||||
|
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||||
|
for i in xrange(0, values):
|
||||||
|
block_offset = i * bpv / 64
|
||||||
|
bit_offset = (i * bpv) % 64
|
||||||
|
if bit_offset == 0:
|
||||||
|
# start of block
|
||||||
|
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - bpv))
|
||||||
|
elif bit_offset + bpv == 64:
|
||||||
|
# end of block
|
||||||
|
f.write(" | values[vi++];\n")
|
||||||
|
elif bit_offset + bpv < 64:
|
||||||
|
# inside a block
|
||||||
|
f.write(" | (values[vi++] << %d)" %(64 - bit_offset - bpv))
|
||||||
|
else:
|
||||||
|
# value spans across 2 blocks
|
||||||
|
right_bits = bit_offset + bpv - 64
|
||||||
|
f.write(" | (values[vi] >>> %d);\n" %right_bits)
|
||||||
|
f.write(" blocks[bi++] = (values[vi++] << %d)" %(64 - right_bits))
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
p64_bpv = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
|
||||||
|
f = open(OUTPUT_FILE, 'w')
|
||||||
|
f.write(HEADER)
|
||||||
|
f.write(" static {\n")
|
||||||
|
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED, new BulkOperation[65]);")
|
||||||
|
for bpv in xrange(1, 65):
|
||||||
|
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED)[%d] = new Packed64BulkOperation%d();\n" %(bpv, bpv))
|
||||||
|
f.write(" BULK_OPERATIONS.put(PackedInts.Format.PACKED_SINGLE_BLOCK, new BulkOperation[65]);\n")
|
||||||
|
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
|
||||||
|
f.write(" BULK_OPERATIONS.get(PackedInts.Format.PACKED_SINGLE_BLOCK)[%d] = new Packed64SingleBlockBulkOperation%d();\n" %(bpv, bpv))
|
||||||
|
f.write(" }\n")
|
||||||
|
for bpv in xrange(1, 65):
|
||||||
|
packed64(bpv, f)
|
||||||
|
for bpv in PACKED_64_SINGLE_BLOCK_BPV:
|
||||||
|
packed64singleblock(bpv,f)
|
||||||
|
f.write(FOOTER)
|
||||||
|
f.close()
|
|
@ -0,0 +1,175 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
HEADER="""// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
|
||||||
|
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
|
||||||
|
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
for bpv in TYPES.keys():
|
||||||
|
type
|
||||||
|
f = open("Direct%d.java" %bpv, 'w')
|
||||||
|
f.write(HEADER)
|
||||||
|
f.write("""/**
|
||||||
|
* Direct wrapping of %d-bits values to a backing array.
|
||||||
|
* @lucene.internal
|
||||||
|
*/\n""" %bpv)
|
||||||
|
f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
|
||||||
|
f.write(" final %s[] values;\n\n" %TYPES[bpv])
|
||||||
|
|
||||||
|
f.write(" Direct%d(int valueCount) {\n" %bpv)
|
||||||
|
f.write(" super(valueCount, %d);\n" %bpv)
|
||||||
|
f.write(" values = new %s[valueCount];\n" %TYPES[bpv])
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
f.write(" Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
|
||||||
|
f.write(" this(valueCount);\n")
|
||||||
|
f.write(" for (int i = 0; i < valueCount; ++i) {\n")
|
||||||
|
f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
|
||||||
|
f.write(" }\n")
|
||||||
|
if bpv != 64:
|
||||||
|
f.write(" final int mod = valueCount %% %d;\n" %(64 / bpv))
|
||||||
|
f.write(" if (mod != 0) {\n")
|
||||||
|
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
|
||||||
|
f.write(" in.read%s();\n" %TYPES[bpv].title())
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
|
||||||
|
f.write("""
|
||||||
|
@Override
|
||||||
|
public long get(final int index) {
|
||||||
|
return values[index]%s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(final int index, final long value) {
|
||||||
|
values[index] = %s(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.sizeOf(values);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(values, %s0L);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getArray() {
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasArray() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
""" %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
|
||||||
|
|
||||||
|
if bpv == 64:
|
||||||
|
f.write("""
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
System.arraycopy(values, index, arr, off, gets);
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
System.arraycopy(arr, off, values, index, sets);
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, val);
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
else:
|
||||||
|
f.write("""
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||||
|
arr[o] = values[i]%s;
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
|
||||||
|
values[i] = %sarr[o];
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
assert val == (val%s);
|
||||||
|
Arrays.fill(values, fromIndex, toIndex, %sval);
|
||||||
|
}
|
||||||
|
""" %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
|
||||||
|
|
||||||
|
f.write("}\n")
|
||||||
|
|
||||||
|
f.close()
|
|
@ -58,12 +58,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static float overheadPerValue(int bitsPerValue) {
|
|
||||||
int valuesPerBlock = 64 / bitsPerValue;
|
|
||||||
int overhead = 64 %% bitsPerValue;
|
|
||||||
return (float) overhead / valuesPerBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||||
return valueCount / valuesPerBlock
|
return valueCount / valuesPerBlock
|
||||||
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1);
|
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1);
|
||||||
|
@ -111,16 +105,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
|
|
||||||
// bulk get
|
// bulk get
|
||||||
assert index %% valuesPerBlock == 0;
|
assert index %% valuesPerBlock == 0;
|
||||||
final long readMask = (1L << bitsPerValue) - 1;
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||||
final int startBlock = index / valuesPerBlock;
|
assert op.blocks() == 1;
|
||||||
final int endBlock = (index + len) / valuesPerBlock;
|
assert op.values() == valuesPerBlock;
|
||||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
final int blockIndex = index / valuesPerBlock;
|
||||||
|
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||||
|
op.get(blocks, blockIndex, arr, off, nblocks);
|
||||||
|
final int diff = nblocks * valuesPerBlock;
|
||||||
index += diff; len -= diff;
|
index += diff; len -= diff;
|
||||||
for (int block = startBlock; block < endBlock; ++block) {
|
|
||||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
|
||||||
arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index > originalIndex) {
|
if (index > originalIndex) {
|
||||||
// stay at the block boundary
|
// stay at the block boundary
|
||||||
|
@ -157,17 +149,14 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
|
|
||||||
// bulk set
|
// bulk set
|
||||||
assert index %% valuesPerBlock == 0;
|
assert index %% valuesPerBlock == 0;
|
||||||
final int startBlock = index / valuesPerBlock;
|
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||||
final int endBlock = (index + len) / valuesPerBlock;
|
assert op.blocks() == 1;
|
||||||
final int diff = (endBlock - startBlock) * valuesPerBlock;
|
assert op.values() == valuesPerBlock;
|
||||||
|
final int blockIndex = index / valuesPerBlock;
|
||||||
|
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||||
|
op.set(blocks, blockIndex, arr, off, nblocks);
|
||||||
|
final int diff = nblocks * valuesPerBlock;
|
||||||
index += diff; len -= diff;
|
index += diff; len -= diff;
|
||||||
for (int block = startBlock; block < endBlock; ++block) {
|
|
||||||
long next = 0L;
|
|
||||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
|
||||||
next |= (arr[off++] << (i * bitsPerValue));
|
|
||||||
}
|
|
||||||
blocks[block] = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index > originalIndex) {
|
if (index > originalIndex) {
|
||||||
// stay at the block boundary
|
// stay at the block boundary
|
||||||
|
@ -221,8 +210,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int getFormat() {
|
protected PackedInts.Format getFormat() {
|
||||||
return PackedInts.PACKED_SINGLE_BLOCK;
|
return PackedInts.Format.PACKED_SINGLE_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
HEADER="""// This file has been automatically generated, DO NOT EDIT
|
||||||
|
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TYPES = {8: "byte", 16: "short"}
|
||||||
|
MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
|
||||||
|
CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
for bpv in TYPES.keys():
|
||||||
|
type
|
||||||
|
f = open("Packed%dThreeBlocks.java" %bpv, 'w')
|
||||||
|
f.write(HEADER)
|
||||||
|
f.write("""/**
|
||||||
|
* Packs integers into 3 %ss (%d bits per value).
|
||||||
|
* @lucene.internal
|
||||||
|
*/\n""" %(TYPES[bpv], bpv*3))
|
||||||
|
f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
|
||||||
|
f.write(" final %s[] blocks;\n\n" %TYPES[bpv])
|
||||||
|
|
||||||
|
f.write(" public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
|
||||||
|
|
||||||
|
f.write(" Packed%dThreeBlocks(int valueCount) {\n" %bpv)
|
||||||
|
f.write(" super(valueCount, %d);\n" %(bpv*3))
|
||||||
|
f.write(" if (valueCount > MAX_SIZE) {\n")
|
||||||
|
f.write(" throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
|
||||||
|
f.write(" }\n\n")
|
||||||
|
|
||||||
|
f.write(" Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
|
||||||
|
f.write(" this(valueCount);\n")
|
||||||
|
f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
|
||||||
|
f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" final int mod = blocks.length %% %d;\n" %(64 / bpv))
|
||||||
|
f.write(" if (mod != 0) {\n")
|
||||||
|
f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
|
||||||
|
f.write(" in.read%s();\n" %TYPES[bpv].title())
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
|
||||||
|
f.write("""
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
final int o = index * 3;
|
||||||
|
return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int gets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
|
||||||
|
arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
|
||||||
|
}
|
||||||
|
return gets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(int index, long value) {
|
||||||
|
final int o = index * 3;
|
||||||
|
blocks[o] = %s(value >>> %d);
|
||||||
|
blocks[o+1] = %s(value >>> %d);
|
||||||
|
blocks[o+2] = %svalue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||||
|
assert index >= 0 && index < valueCount;
|
||||||
|
assert off + len <= arr.length;
|
||||||
|
|
||||||
|
final int sets = Math.min(valueCount - index, len);
|
||||||
|
for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
|
||||||
|
final long value = arr[i];
|
||||||
|
blocks[o++] = %s(value >>> %d);
|
||||||
|
blocks[o++] = %s(value >>> %d);
|
||||||
|
blocks[o++] = %svalue;
|
||||||
|
}
|
||||||
|
return sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
final %s block1 = %s(val >>> %d);
|
||||||
|
final %s block2 = %s(val >>> %d);
|
||||||
|
final %s block3 = %sval;
|
||||||
|
for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
|
||||||
|
blocks[i] = block1;
|
||||||
|
blocks[i+1] = block2;
|
||||||
|
blocks[i+2] = block3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(blocks, %s0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.sizeOf(blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
|
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""" %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
|
||||||
|
2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
|
||||||
|
CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
|
||||||
|
|
||||||
|
f.close()
|
|
@ -19,9 +19,16 @@
|
||||||
<head></head>
|
<head></head>
|
||||||
<body bgcolor="white">
|
<body bgcolor="white">
|
||||||
|
|
||||||
|
<p>Packed integer arrays and streams.</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
The packed package provides random access capable arrays of positive longs.
|
The packed package provides
|
||||||
The implementations provides different trade offs between memory usage and
|
<ul>
|
||||||
|
<li>sequential and random access capable arrays of positive longs,</li>
|
||||||
|
<li>routines for efficient serialization and deserialization of streams of packed integers.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
The implementations provide different trade-offs between memory usage and
|
||||||
access speed. The standard usage scenario is replacing large int or long
|
access speed. The standard usage scenario is replacing large int or long
|
||||||
arrays in order to reduce the memory footprint.
|
arrays in order to reduce the memory footprint.
|
||||||
</p><p>
|
</p><p>
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||||
|
@ -57,6 +58,9 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
for(int nbits=1;nbits<=64;nbits++) {
|
for(int nbits=1;nbits<=64;nbits++) {
|
||||||
final long maxValue = PackedInts.maxValue(nbits);
|
final long maxValue = PackedInts.maxValue(nbits);
|
||||||
final int valueCount = 100+random().nextInt(500);
|
final int valueCount = 100+random().nextInt(500);
|
||||||
|
final int bufferSize = random().nextBoolean()
|
||||||
|
? _TestUtil.nextInt(random(), 0, 48)
|
||||||
|
: _TestUtil.nextInt(random(), 0, 4096);
|
||||||
final Directory d = newDirectory();
|
final Directory d = newDirectory();
|
||||||
|
|
||||||
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
|
||||||
|
@ -79,10 +83,10 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
// ensure that finish() added the (valueCount-actualValueCount) missing values
|
// ensure that finish() added the (valueCount-actualValueCount) missing values
|
||||||
final long bytes;
|
final long bytes;
|
||||||
switch (w.getFormat()) {
|
switch (w.getFormat()) {
|
||||||
case PackedInts.PACKED:
|
case PACKED:
|
||||||
bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
|
bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
|
||||||
break;
|
break;
|
||||||
case PackedInts.PACKED_SINGLE_BLOCK:
|
case PACKED_SINGLE_BLOCK:
|
||||||
final int valuesPerBlock = 64 / w.bitsPerValue;
|
final int valuesPerBlock = 64 / w.bitsPerValue;
|
||||||
bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
|
bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
|
||||||
break;
|
break;
|
||||||
|
@ -97,7 +101,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header
|
CodecUtil.checkHeader(in, PackedInts.CODEC_NAME, PackedInts.VERSION_START, PackedInts.VERSION_CURRENT); // codec header
|
||||||
assertEquals(w.bitsPerValue, in.readVInt());
|
assertEquals(w.bitsPerValue, in.readVInt());
|
||||||
assertEquals(valueCount, in.readVInt());
|
assertEquals(valueCount, in.readVInt());
|
||||||
assertEquals(w.getFormat(), in.readVInt());
|
assertEquals(w.getFormat().getId(), in.readVInt());
|
||||||
assertEquals(startFp, in.getFilePointer());
|
assertEquals(startFp, in.getFilePointer());
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
|
@ -113,37 +117,34 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // test reader iterator next
|
{ // test reader iterator next
|
||||||
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
||||||
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in);
|
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
|
||||||
for(int i=0;i<valueCount;i++) {
|
for(int i=0;i<valueCount;i++) {
|
||||||
assertEquals("index=" + i + " valueCount="
|
assertEquals("index=" + i + " valueCount="
|
||||||
+ valueCount + " nbits=" + nbits + " for "
|
+ valueCount + " nbits=" + nbits + " for "
|
||||||
+ r.getClass().getSimpleName(), values[i], r.next());
|
+ r.getClass().getSimpleName(), values[i], r.next());
|
||||||
|
assertEquals(i, r.ord());
|
||||||
}
|
}
|
||||||
assertEquals(fp, in.getFilePointer());
|
assertEquals(fp, in.getFilePointer());
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
{ // test reader iterator next vs. advance
|
|
||||||
|
{ // test reader iterator bulk next
|
||||||
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
IndexInput in = d.openInput("out.bin", newIOContext(random()));
|
||||||
PackedInts.ReaderIterator intsEnum = PackedInts.getReaderIterator(in);
|
PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
|
||||||
for (int i = 0; i < valueCount; i +=
|
int i = 0;
|
||||||
1 + ((valueCount - i) <= 20 ? random().nextInt(valueCount - i)
|
while (i < valueCount) {
|
||||||
: random().nextInt(20))) {
|
final int count = _TestUtil.nextInt(random(), 1, 95);
|
||||||
final String msg = "index=" + i + " valueCount="
|
final LongsRef next = r.next(count);
|
||||||
+ valueCount + " nbits=" + nbits + " for "
|
for (int k = 0; k < next.length; ++k) {
|
||||||
+ intsEnum.getClass().getSimpleName();
|
assertEquals("index=" + i + " valueCount="
|
||||||
if (i - intsEnum.ord() == 1 && random().nextBoolean()) {
|
+ valueCount + " nbits=" + nbits + " for "
|
||||||
assertEquals(msg, values[i], intsEnum.next());
|
+ r.getClass().getSimpleName(), values[i + k], next.longs[next.offset + k]);
|
||||||
} else {
|
|
||||||
assertEquals(msg, values[i], intsEnum.advance(i));
|
|
||||||
}
|
}
|
||||||
assertEquals(msg, i, intsEnum.ord());
|
i += next.length;
|
||||||
}
|
}
|
||||||
if (intsEnum.ord() < valueCount - 1)
|
|
||||||
assertEquals(values[valueCount - 1], intsEnum
|
|
||||||
.advance(valueCount - 1));
|
|
||||||
assertEquals(valueCount - 1, intsEnum.ord());
|
|
||||||
assertEquals(fp, in.getFilePointer());
|
assertEquals(fp, in.getFilePointer());
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue