LUCENE-4634: New streaming API to read/write ints with arbitrary numbers of bits per value.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1424827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-12-21 09:42:04 +00:00
parent a5608fb81e
commit 048924a82e
3 changed files with 189 additions and 0 deletions

View File

@ -0,0 +1,72 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.DataInput;
/**
* A {@link DataInput} wrapper to read unaligned, variable-length packed
* integers. This API is much slower than the {@link PackedInts} fixed-length
* API but can be convenient to save space.
* @see PackedDataOutput
* @lucene.internal
*/
public final class PackedDataInput {
final DataInput in;
long current;
int remainingBits;
/**
* Create a new instance that wraps <code>in</code>.
*/
public PackedDataInput(DataInput in) {
this.in = in;
skipToNextByte();
}
/**
* Read the next long using exactly <code>bitsPerValue</code> bits.
*/
public long readLong(int bitsPerValue) throws IOException {
assert bitsPerValue > 0 && bitsPerValue <= 64 : bitsPerValue;
long r = 0;
while (bitsPerValue > 0) {
if (remainingBits == 0) {
current = in.readByte() & 0xFF;
remainingBits = 8;
}
final int bits = Math.min(bitsPerValue, remainingBits);
r = (r << bits) | ((current >>> (remainingBits - bits)) & ((1L << bits) - 1));
bitsPerValue -= bits;
remainingBits -= bits;
}
return r;
}
/**
* If there are pending bits (at most 7), they will be ignored and the next
* value will be read starting at the next byte.
*/
public void skipToNextByte() {
remainingBits = 0;
}
}

View File

@ -0,0 +1,74 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
/**
* A {@link DataOutput} wrapper to write unaligned, variable-length packed
* integers.
* @see PackedDataInput
* @lucene.internal
*/
public final class PackedDataOutput {
final DataOutput out;
long current;
int remainingBits;
/**
* Create a new instance that wraps <code>out</code>.
*/
public PackedDataOutput(DataOutput out) {
this.out = out;
current = 0;
remainingBits = 8;
}
/**
* Write a value using exactly <code>bitsPerValue</code> bits.
*/
public void writeLong(long value, int bitsPerValue) throws IOException {
assert bitsPerValue == 64 || (value >= 0 && value <= PackedInts.maxValue(bitsPerValue));
while (bitsPerValue > 0) {
if (remainingBits == 0) {
out.writeByte((byte) current);
current = 0L;
remainingBits = 8;
}
final int bits = Math.min(remainingBits, bitsPerValue);
current = current | (((value >>> (bitsPerValue - bits)) & ((1L << bits) - 1)) << (remainingBits - bits));
bitsPerValue -= bits;
remainingBits -= bits;
}
}
/**
* Flush pending bits to the underlying {@link DataOutput}.
*/
public void flush() throws IOException {
if (remainingBits < 8) {
out.writeByte((byte) current);
}
remainingBits = 8;
current = 0L;
}
}

View File

@ -787,4 +787,47 @@ public class TestPackedInts extends LuceneTestCase {
return true;
}
public void testPackedInputOutput() throws IOException {
final long[] longs = new long[random().nextInt(8192)];
final int[] bitsPerValues = new int[longs.length];
final boolean[] skip = new boolean[longs.length];
for (int i = 0; i < longs.length; ++i) {
final int bpv = RandomInts.randomIntBetween(random(), 1, 64);
bitsPerValues[i] = random().nextBoolean() ? bpv : _TestUtil.nextInt(random(), bpv, 64);
if (bpv == 64) {
longs[i] = random().nextLong();
} else {
longs[i] = _TestUtil.nextLong(random(), 0, PackedInts.maxValue(bpv));
}
skip[i] = rarely();
}
final Directory dir = newDirectory();
final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
PackedDataOutput pout = new PackedDataOutput(out);
long totalBits = 0;
for (int i = 0; i < longs.length; ++i) {
pout.writeLong(longs[i], bitsPerValues[i]);
totalBits += bitsPerValues[i];
if (skip[i]) {
pout.flush();
totalBits = 8 * (long) Math.ceil((double) totalBits / 8);
}
}
pout.flush();
assertEquals((long) Math.ceil((double) totalBits / 8), out.getFilePointer());
out.close();
final IndexInput in = dir.openInput("out.bin", IOContext.READONCE);
final PackedDataInput pin = new PackedDataInput(in);
for (int i = 0; i < longs.length; ++i) {
assertEquals("" + i, longs[i], pin.readLong(bitsPerValues[i]));
if (skip[i]) {
pin.skipToNextByte();
}
}
assertEquals((long) Math.ceil((double) totalBits / 8), in.getFilePointer());
in.close();
dir.close();
}
}