diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataInput.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataInput.java new file mode 100644 index 00000000000..87a4c570b3f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataInput.java @@ -0,0 +1,72 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.DataInput; + +/** + * A {@link DataInput} wrapper to read unaligned, variable-length packed + * integers. This API is much slower than the {@link PackedInts} fixed-length + * API but can be convenient to save space. + * @see PackedDataOutput + * @lucene.internal + */ +public final class PackedDataInput { + + final DataInput in; + long current; + int remainingBits; + + /** + * Create a new instance that wraps in. + */ + public PackedDataInput(DataInput in) { + this.in = in; + skipToNextByte(); + } + + /** + * Read the next long using exactly bitsPerValue bits. + */ + public long readLong(int bitsPerValue) throws IOException { + assert bitsPerValue > 0 && bitsPerValue <= 64 : bitsPerValue; + long r = 0; + while (bitsPerValue > 0) { + if (remainingBits == 0) { + current = in.readByte() & 0xFF; + remainingBits = 8; + } + final int bits = Math.min(bitsPerValue, remainingBits); + r = (r << bits) | ((current >>> (remainingBits - bits)) & ((1L << bits) - 1)); + bitsPerValue -= bits; + remainingBits -= bits; + } + return r; + } + + /** + * If there are pending bits (at most 7), they will be ignored and the next + * value will be read starting at the next byte. + */ + public void skipToNextByte() { + remainingBits = 0; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataOutput.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataOutput.java new file mode 100644 index 00000000000..e750526a30a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedDataOutput.java @@ -0,0 +1,74 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.DataOutput; + +/** + * A {@link DataOutput} wrapper to write unaligned, variable-length packed + * integers. + * @see PackedDataInput + * @lucene.internal + */ +public final class PackedDataOutput { + + final DataOutput out; + long current; + int remainingBits; + + /** + * Create a new instance that wraps out. + */ + public PackedDataOutput(DataOutput out) { + this.out = out; + current = 0; + remainingBits = 8; + } + + /** + * Write a value using exactly bitsPerValue bits. + */ + public void writeLong(long value, int bitsPerValue) throws IOException { + assert bitsPerValue == 64 || (value >= 0 && value <= PackedInts.maxValue(bitsPerValue)); + while (bitsPerValue > 0) { + if (remainingBits == 0) { + out.writeByte((byte) current); + current = 0L; + remainingBits = 8; + } + final int bits = Math.min(remainingBits, bitsPerValue); + current = current | (((value >>> (bitsPerValue - bits)) & ((1L << bits) - 1)) << (remainingBits - bits)); + bitsPerValue -= bits; + remainingBits -= bits; + } + } + + /** + * Flush pending bits to the underlying {@link DataOutput}. + */ + public void flush() throws IOException { + if (remainingBits < 8) { + out.writeByte((byte) current); + } + remainingBits = 8; + current = 0L; + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java index d7d8b66db4d..e67bc770bb0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java +++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java @@ -787,4 +787,47 @@ public class TestPackedInts extends LuceneTestCase { return true; } + public void testPackedInputOutput() throws IOException { + final long[] longs = new long[random().nextInt(8192)]; + final int[] bitsPerValues = new int[longs.length]; + final boolean[] skip = new boolean[longs.length]; + for (int i = 0; i < longs.length; ++i) { + final int bpv = RandomInts.randomIntBetween(random(), 1, 64); + bitsPerValues[i] = random().nextBoolean() ? bpv : _TestUtil.nextInt(random(), bpv, 64); + if (bpv == 64) { + longs[i] = random().nextLong(); + } else { + longs[i] = _TestUtil.nextLong(random(), 0, PackedInts.maxValue(bpv)); + } + skip[i] = rarely(); + } + + final Directory dir = newDirectory(); + final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT); + PackedDataOutput pout = new PackedDataOutput(out); + long totalBits = 0; + for (int i = 0; i < longs.length; ++i) { + pout.writeLong(longs[i], bitsPerValues[i]); + totalBits += bitsPerValues[i]; + if (skip[i]) { + pout.flush(); + totalBits = 8 * (long) Math.ceil((double) totalBits / 8); + } + } + pout.flush(); + assertEquals((long) Math.ceil((double) totalBits / 8), out.getFilePointer()); + out.close(); + final IndexInput in = dir.openInput("out.bin", IOContext.READONCE); + final PackedDataInput pin = new PackedDataInput(in); + for (int i = 0; i < longs.length; ++i) { + assertEquals("" + i, longs[i], pin.readLong(bitsPerValues[i])); + if (skip[i]) { + pin.skipToNextByte(); + } + } + assertEquals((long) Math.ceil((double) totalBits / 8), in.getFilePointer()); + in.close(); + dir.close(); + } + }