mirror of https://github.com/apache/lucene.git
LUCENE-4634: New streaming API to read/write ints with arbitrary numbers of bits per value.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1424827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5608fb81e
commit
048924a82e
|
@ -0,0 +1,72 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
||||
/**
|
||||
* A {@link DataInput} wrapper to read unaligned, variable-length packed
|
||||
* integers. This API is much slower than the {@link PackedInts} fixed-length
|
||||
* API but can be convenient to save space.
|
||||
* @see PackedDataOutput
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class PackedDataInput {
|
||||
|
||||
final DataInput in;
|
||||
long current;
|
||||
int remainingBits;
|
||||
|
||||
/**
|
||||
* Create a new instance that wraps <code>in</code>.
|
||||
*/
|
||||
public PackedDataInput(DataInput in) {
|
||||
this.in = in;
|
||||
skipToNextByte();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next long using exactly <code>bitsPerValue</code> bits.
|
||||
*/
|
||||
public long readLong(int bitsPerValue) throws IOException {
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : bitsPerValue;
|
||||
long r = 0;
|
||||
while (bitsPerValue > 0) {
|
||||
if (remainingBits == 0) {
|
||||
current = in.readByte() & 0xFF;
|
||||
remainingBits = 8;
|
||||
}
|
||||
final int bits = Math.min(bitsPerValue, remainingBits);
|
||||
r = (r << bits) | ((current >>> (remainingBits - bits)) & ((1L << bits) - 1));
|
||||
bitsPerValue -= bits;
|
||||
remainingBits -= bits;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* If there are pending bits (at most 7), they will be ignored and the next
|
||||
* value will be read starting at the next byte.
|
||||
*/
|
||||
public void skipToNextByte() {
|
||||
remainingBits = 0;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
/**
|
||||
* A {@link DataOutput} wrapper to write unaligned, variable-length packed
|
||||
* integers.
|
||||
* @see PackedDataInput
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class PackedDataOutput {
|
||||
|
||||
final DataOutput out;
|
||||
long current;
|
||||
int remainingBits;
|
||||
|
||||
/**
|
||||
* Create a new instance that wraps <code>out</code>.
|
||||
*/
|
||||
public PackedDataOutput(DataOutput out) {
|
||||
this.out = out;
|
||||
current = 0;
|
||||
remainingBits = 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a value using exactly <code>bitsPerValue</code> bits.
|
||||
*/
|
||||
public void writeLong(long value, int bitsPerValue) throws IOException {
|
||||
assert bitsPerValue == 64 || (value >= 0 && value <= PackedInts.maxValue(bitsPerValue));
|
||||
while (bitsPerValue > 0) {
|
||||
if (remainingBits == 0) {
|
||||
out.writeByte((byte) current);
|
||||
current = 0L;
|
||||
remainingBits = 8;
|
||||
}
|
||||
final int bits = Math.min(remainingBits, bitsPerValue);
|
||||
current = current | (((value >>> (bitsPerValue - bits)) & ((1L << bits) - 1)) << (remainingBits - bits));
|
||||
bitsPerValue -= bits;
|
||||
remainingBits -= bits;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush pending bits to the underlying {@link DataOutput}.
|
||||
*/
|
||||
public void flush() throws IOException {
|
||||
if (remainingBits < 8) {
|
||||
out.writeByte((byte) current);
|
||||
}
|
||||
remainingBits = 8;
|
||||
current = 0L;
|
||||
}
|
||||
|
||||
}
|
|
@ -787,4 +787,47 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
return true;
|
||||
}
|
||||
|
||||
public void testPackedInputOutput() throws IOException {
|
||||
final long[] longs = new long[random().nextInt(8192)];
|
||||
final int[] bitsPerValues = new int[longs.length];
|
||||
final boolean[] skip = new boolean[longs.length];
|
||||
for (int i = 0; i < longs.length; ++i) {
|
||||
final int bpv = RandomInts.randomIntBetween(random(), 1, 64);
|
||||
bitsPerValues[i] = random().nextBoolean() ? bpv : _TestUtil.nextInt(random(), bpv, 64);
|
||||
if (bpv == 64) {
|
||||
longs[i] = random().nextLong();
|
||||
} else {
|
||||
longs[i] = _TestUtil.nextLong(random(), 0, PackedInts.maxValue(bpv));
|
||||
}
|
||||
skip[i] = rarely();
|
||||
}
|
||||
|
||||
final Directory dir = newDirectory();
|
||||
final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
|
||||
PackedDataOutput pout = new PackedDataOutput(out);
|
||||
long totalBits = 0;
|
||||
for (int i = 0; i < longs.length; ++i) {
|
||||
pout.writeLong(longs[i], bitsPerValues[i]);
|
||||
totalBits += bitsPerValues[i];
|
||||
if (skip[i]) {
|
||||
pout.flush();
|
||||
totalBits = 8 * (long) Math.ceil((double) totalBits / 8);
|
||||
}
|
||||
}
|
||||
pout.flush();
|
||||
assertEquals((long) Math.ceil((double) totalBits / 8), out.getFilePointer());
|
||||
out.close();
|
||||
final IndexInput in = dir.openInput("out.bin", IOContext.READONCE);
|
||||
final PackedDataInput pin = new PackedDataInput(in);
|
||||
for (int i = 0; i < longs.length; ++i) {
|
||||
assertEquals("" + i, longs[i], pin.readLong(bitsPerValues[i]));
|
||||
if (skip[i]) {
|
||||
pin.skipToNextByte();
|
||||
}
|
||||
}
|
||||
assertEquals((long) Math.ceil((double) totalBits / 8), in.getFilePointer());
|
||||
in.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue