LUCENE-9907: Move PackedInts#getReaderNoHeader() to backwards codec

This commit is contained in:
Ignacio Vera 2021-04-20 09:09:38 +02:00 committed by GitHub
parent b0662c807c
commit e0436872c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 803 additions and 169 deletions

View File

@ -37,10 +37,7 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -221,15 +218,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
+ ",size=" + size() + ",blocks=" + blocks.length + ")";
}
public static Packed64SingleBlock create(DataInput in,
int valueCount, int bitsPerValue) throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
""" % (SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
FOOTER = "}"

View File

@ -299,6 +299,9 @@ Other
* LUCENE-9705: Make new versions of all index formats for the Lucene90 codec and move
the existing ones to the backwards codecs. (Julie Tibshirani, Ignacio Vera)
* LUCENE-9907: Remove dependency on PackedInts#getReader() from the current codecs and move the
method to backwards codec. (Ignacio Vera)
======================= Lucene 8.9.0 =======================

View File

@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene50.compressing;
import static org.apache.lucene.util.BitUtil.zigZagDecode;
import java.io.IOException;
import org.apache.lucene.backward_codecs.packed.LegacyPackedInts;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.IndexInput;
@ -74,7 +75,7 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
"Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
}
docBasesDeltas[blockCount] =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
fieldsIndexIn,
PackedInts.Format.PACKED,
packedIntsVersion,
@ -90,7 +91,7 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
"Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
}
startPointersDeltas[blockCount] =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
fieldsIndexIn,
PackedInts.Format.PACKED,
packedIntsVersion,

View File

@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene50.compressing;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.backward_codecs.packed.LegacyPackedInts;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.compressing.CompressionMode;
@ -403,12 +404,12 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade
{
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
final PackedInts.Reader allFieldNumOffs =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
switch (vectorsStream.readVInt()) {
case 0:
final PackedInts.Reader fieldFlags =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
@ -425,7 +426,7 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade
break;
case 1:
flags =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
@ -446,7 +447,7 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade
{
final int bitsRequired = vectorsStream.readVInt();
numTerms =
PackedInts.getReaderNoHeader(
LegacyPackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,

View File

@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.packed;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/**
* Immutable version of {@code Packed64} which is constructed from am existing {@link DataInput}.
*/
class LegacyPacked64 extends PackedInts.Reader {
static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
/** Values are stores contiguously in the blocks array. */
private final long[] blocks;
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
private final long maskRight;
/** Optimization: Saves one lookup in {@link #get(int)}. */
private final int bpvMinusBlockSize;
/** number of values */
protected final int valueCount;
/** bits per value. */
protected final int bitsPerValue;
/**
* Creates an array with content retrieved from the given DataInput.
*
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws IOException if the values for the backing array could not be retrieved.
*/
public LegacyPacked64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
throws IOException {
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
final PackedInts.Format format = PackedInts.Format.PACKED;
final long byteCount =
format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
final int longCount =
format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
blocks = new long[longCount];
// read as many longs as we can
for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
final int remaining = (int) (byteCount % 8);
if (remaining != 0) {
// read the last bytes
long lastLong = 0;
for (int i = 0; i < remaining; ++i) {
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
}
blocks[blocks.length - 1] = lastLong;
}
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
@Override
public final int size() {
return valueCount;
}
@Override
public long get(final int index) {
// The abstract index in a bit stream
final long majorBitPos = (long) index * bitsPerValue;
// The index in the backing long-array
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS);
// The number of value-bits in the second long
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) { // Single block
return (blocks[elementPos] >>> -endBits) & maskRight;
}
// Two blocks
return ((blocks[elementPos] << endBits) | (blocks[elementPos + 1] >>> (BLOCK_SIZE - endBits)))
& maskRight;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final PackedInts.Decoder decoder =
PackedInts.getDecoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % decoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % decoder.longValueCount() == 0;
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / decoder.longValueCount();
decoder.decode(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * decoder.longValueCount();
index += gotValues;
len -= gotValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public String toString() {
return "LegacyPacked64(bitsPerValue="
+ bitsPerValue
+ ",size="
+ size()
+ ",blocks="
+ blocks.length
+ ")";
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 3 * Integer.BYTES // bpvMinusBlockSize,valueCount,bitsPerValue
+ Long.BYTES // maskRight
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
+ RamUsageEstimator.sizeOf(blocks);
}
}

View File

@ -0,0 +1,384 @@
// This file has been automatically generated, DO NOT EDIT
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.lucene.backward_codecs.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/**
* This class is similar to {@link LegacyPacked64} except that it trades space for speed by ensuring
* that a single block needs to be read/written in order to read/write a value.
*/
abstract class LegacyPacked64SingleBlock extends PackedInts.Reader {
public static final int MAX_SUPPORTED_BITS_PER_VALUE = 32;
private static final int[] SUPPORTED_BITS_PER_VALUE =
new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32};
public static boolean isSupported(int bitsPerValue) {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock + (valueCount % valuesPerBlock == 0 ? 0 : 1);
}
final long[] blocks;
protected final int valueCount;
protected final int bitsPerValue;
LegacyPacked64SingleBlock(int valueCount, int bitsPerValue) {
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
assert isSupported(bitsPerValue);
final int valuesPerBlock = 64 / bitsPerValue;
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
}
@Override
public final int size() {
return valueCount;
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * Integer.BYTES // valueCount,bitsPerValue
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
+ RamUsageEstimator.sizeOf(blocks);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
// go to the next block boundary
final int valuesPerBlock = 64 / bitsPerValue;
final int offsetInBlock = index % valuesPerBlock;
if (offsetInBlock != 0) {
for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % valuesPerBlock == 0;
@SuppressWarnings("deprecation")
final PackedInts.Decoder decoder =
PackedInts.getDecoder(
PackedInts.Format.PACKED_SINGLE_BLOCK, PackedInts.VERSION_CURRENT, bitsPerValue);
assert decoder.longBlockCount() == 1;
assert decoder.longValueCount() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
decoder.decode(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff;
len -= diff;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to
// get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public String toString() {
return getClass().getSimpleName()
+ "(bitsPerValue="
+ bitsPerValue
+ ",size="
+ size()
+ ",blocks="
+ blocks.length
+ ")";
}
public static LegacyPacked64SingleBlock create(DataInput in, int valueCount, int bitsPerValue)
throws IOException {
LegacyPacked64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
private static LegacyPacked64SingleBlock create(int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 1:
return new Packed64SingleBlock1(valueCount);
case 2:
return new Packed64SingleBlock2(valueCount);
case 3:
return new Packed64SingleBlock3(valueCount);
case 4:
return new Packed64SingleBlock4(valueCount);
case 5:
return new Packed64SingleBlock5(valueCount);
case 6:
return new Packed64SingleBlock6(valueCount);
case 7:
return new Packed64SingleBlock7(valueCount);
case 8:
return new Packed64SingleBlock8(valueCount);
case 9:
return new Packed64SingleBlock9(valueCount);
case 10:
return new Packed64SingleBlock10(valueCount);
case 12:
return new Packed64SingleBlock12(valueCount);
case 16:
return new Packed64SingleBlock16(valueCount);
case 21:
return new Packed64SingleBlock21(valueCount);
case 32:
return new Packed64SingleBlock32(valueCount);
default:
throw new IllegalArgumentException("Unsupported number of bits per value: " + 32);
}
}
static class Packed64SingleBlock1 extends LegacyPacked64SingleBlock {
Packed64SingleBlock1(int valueCount) {
super(valueCount, 1);
}
@Override
public long get(int index) {
final int o = index >>> 6;
final int b = index & 63;
final int shift = b << 0;
return (blocks[o] >>> shift) & 1L;
}
}
static class Packed64SingleBlock2 extends LegacyPacked64SingleBlock {
Packed64SingleBlock2(int valueCount) {
super(valueCount, 2);
}
@Override
public long get(int index) {
final int o = index >>> 5;
final int b = index & 31;
final int shift = b << 1;
return (blocks[o] >>> shift) & 3L;
}
}
static class Packed64SingleBlock3 extends LegacyPacked64SingleBlock {
Packed64SingleBlock3(int valueCount) {
super(valueCount, 3);
}
@Override
public long get(int index) {
final int o = index / 21;
final int b = index % 21;
final int shift = b * 3;
return (blocks[o] >>> shift) & 7L;
}
}
static class Packed64SingleBlock4 extends LegacyPacked64SingleBlock {
Packed64SingleBlock4(int valueCount) {
super(valueCount, 4);
}
@Override
public long get(int index) {
final int o = index >>> 4;
final int b = index & 15;
final int shift = b << 2;
return (blocks[o] >>> shift) & 15L;
}
}
static class Packed64SingleBlock5 extends LegacyPacked64SingleBlock {
Packed64SingleBlock5(int valueCount) {
super(valueCount, 5);
}
@Override
public long get(int index) {
final int o = index / 12;
final int b = index % 12;
final int shift = b * 5;
return (blocks[o] >>> shift) & 31L;
}
}
static class Packed64SingleBlock6 extends LegacyPacked64SingleBlock {
Packed64SingleBlock6(int valueCount) {
super(valueCount, 6);
}
@Override
public long get(int index) {
final int o = index / 10;
final int b = index % 10;
final int shift = b * 6;
return (blocks[o] >>> shift) & 63L;
}
}
static class Packed64SingleBlock7 extends LegacyPacked64SingleBlock {
Packed64SingleBlock7(int valueCount) {
super(valueCount, 7);
}
@Override
public long get(int index) {
final int o = index / 9;
final int b = index % 9;
final int shift = b * 7;
return (blocks[o] >>> shift) & 127L;
}
}
static class Packed64SingleBlock8 extends LegacyPacked64SingleBlock {
Packed64SingleBlock8(int valueCount) {
super(valueCount, 8);
}
@Override
public long get(int index) {
final int o = index >>> 3;
final int b = index & 7;
final int shift = b << 3;
return (blocks[o] >>> shift) & 255L;
}
}
static class Packed64SingleBlock9 extends LegacyPacked64SingleBlock {
Packed64SingleBlock9(int valueCount) {
super(valueCount, 9);
}
@Override
public long get(int index) {
final int o = index / 7;
final int b = index % 7;
final int shift = b * 9;
return (blocks[o] >>> shift) & 511L;
}
}
static class Packed64SingleBlock10 extends LegacyPacked64SingleBlock {
Packed64SingleBlock10(int valueCount) {
super(valueCount, 10);
}
@Override
public long get(int index) {
final int o = index / 6;
final int b = index % 6;
final int shift = b * 10;
return (blocks[o] >>> shift) & 1023L;
}
}
static class Packed64SingleBlock12 extends LegacyPacked64SingleBlock {
Packed64SingleBlock12(int valueCount) {
super(valueCount, 12);
}
@Override
public long get(int index) {
final int o = index / 5;
final int b = index % 5;
final int shift = b * 12;
return (blocks[o] >>> shift) & 4095L;
}
}
static class Packed64SingleBlock16 extends LegacyPacked64SingleBlock {
Packed64SingleBlock16(int valueCount) {
super(valueCount, 16);
}
@Override
public long get(int index) {
final int o = index >>> 2;
final int b = index & 3;
final int shift = b << 4;
return (blocks[o] >>> shift) & 65535L;
}
}
static class Packed64SingleBlock21 extends LegacyPacked64SingleBlock {
Packed64SingleBlock21(int valueCount) {
super(valueCount, 21);
}
@Override
public long get(int index) {
final int o = index / 3;
final int b = index % 3;
final int shift = b * 21;
return (blocks[o] >>> shift) & 2097151L;
}
}
static class Packed64SingleBlock32 extends LegacyPacked64SingleBlock {
Packed64SingleBlock32(int valueCount) {
super(valueCount, 32);
}
@Override
public long get(int index) {
final int o = index >>> 1;
final int b = index & 1;
final int shift = b << 5;
return (blocks[o] >>> shift) & 4294967295L;
}
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.packed;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts;
/**
* Legacy PackedInts operations.
*
* @lucene.internal
*/
public class LegacyPackedInts {
private LegacyPackedInts() {
// no instances
}
/**
* Expert: Restore a {@code PackedInts.Reader} from a stream without reading metadata at the
* beginning of the stream. This method is useful to restore data from streams which have been
* created using {@link PackedInts#getWriterNoHeader(DataOutput, PackedInts.Format, int, int,
* int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a Reader
* @throws IOException If there is a low-level I/O error
* @see PackedInts#getWriterNoHeader(DataOutput, PackedInts.Format, int, int, int)
* @lucene.internal
*/
public static PackedInts.Reader getReaderNoHeader(
DataInput in, PackedInts.Format format, int version, int valueCount, int bitsPerValue)
throws IOException {
PackedInts.checkVersion(version);
switch (format) {
case PACKED_SINGLE_BLOCK:
return LegacyPacked64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED:
return new LegacyPacked64(version, in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknown Writer format: " + format);
}
}
}

View File

@ -0,0 +1,18 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Legacy PackedInts methods */
package org.apache.lucene.backward_codecs.packed;

View File

@ -0,0 +1,161 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.packed;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.Reader;
public class TestLegacyPackedInts extends LuceneTestCase {
public void testPackedInts() throws IOException {
int num = atLeast(3);
for (int iter = 0; iter < num; iter++) {
for (int nbits = 1; nbits <= 64; nbits++) {
final long maxValue = PackedInts.maxValue(nbits);
final int valueCount = TestUtil.nextInt(random(), 1, 600);
final Directory d = newDirectory();
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
final int mem = random().nextInt(2 * PackedInts.DEFAULT_BUFFER_SIZE);
PackedInts.Writer w =
PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, valueCount, nbits, mem);
final long startFp = out.getFilePointer();
final int actualValueCount =
random().nextBoolean() ? valueCount : TestUtil.nextInt(random(), 0, valueCount);
final long[] values = new long[valueCount];
for (int i = 0; i < actualValueCount; i++) {
if (nbits == 64) {
values[i] = random().nextLong();
} else {
values[i] = TestUtil.nextLong(random(), 0, maxValue);
}
w.add(values[i]);
}
w.finish();
final long fp = out.getFilePointer();
out.close();
// ensure that finish() added the (valueCount-actualValueCount) missing values
final long bytes =
PackedInts.Format.PACKED.byteCount(
PackedInts.VERSION_CURRENT, valueCount, w.bitsPerValue());
assertEquals(bytes, fp - startFp);
{ // test reader
IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.Reader r =
LegacyPackedInts.getReaderNoHeader(
in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, valueCount, nbits);
assertEquals(fp, in.getFilePointer());
for (int i = 0; i < valueCount; i++) {
assertEquals(
"index="
+ i
+ " valueCount="
+ valueCount
+ " nbits="
+ nbits
+ " for "
+ r.getClass().getSimpleName(),
values[i],
r.get(i));
}
in.close();
final long expectedBytesUsed = RamUsageTester.sizeOf(r);
final long computedBytesUsed = r.ramBytesUsed();
assertEquals(
r.getClass() + "expected " + expectedBytesUsed + ", got: " + computedBytesUsed,
expectedBytesUsed,
computedBytesUsed);
}
d.close();
}
}
}
public void testEndPointer() throws IOException {
final Directory dir = newDirectory();
final int valueCount = RandomNumbers.randomIntBetween(random(), 1, 1000);
final IndexOutput out = dir.createOutput("tests.bin", newIOContext(random()));
for (int i = 0; i < valueCount; ++i) {
out.writeLong(0);
}
out.close();
final IndexInput in = dir.openInput("tests.bin", newIOContext(random()));
for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; ++version) {
for (int bpv = 1; bpv <= 64; ++bpv) {
for (PackedInts.Format format : PackedInts.Format.values()) {
if (!format.isSupported(bpv)) {
continue;
}
final long byteCount = format.byteCount(version, valueCount, bpv);
String msg =
"format="
+ format
+ ",version="
+ version
+ ",valueCount="
+ valueCount
+ ",bpv="
+ bpv;
// test reader
in.seek(0L);
LegacyPackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
assertEquals(msg, byteCount, in.getFilePointer());
}
}
}
in.close();
dir.close();
}
public void testSingleValue() throws Exception {
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random()));
PackedInts.Writer w =
PackedInts.getWriterNoHeader(
out, PackedInts.Format.PACKED, 1, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
long value = 17L & PackedInts.maxValue(bitsPerValue);
w.add(value);
w.finish();
final long end = out.getFilePointer();
out.close();
IndexInput in = dir.openInput("out", newIOContext(random()));
Reader reader =
LegacyPackedInts.getReaderNoHeader(
in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, 1, bitsPerValue);
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
assertEquals(msg, 1, reader.size());
assertEquals(msg, value, reader.get(0));
assertEquals(msg, end, in.getFilePointer());
in.close();
dir.close();
}
}
}

View File

@ -26,6 +26,6 @@
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked8.java": "bc5124047b26fc0be147db5bc855be038d306f65",
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked9.java": "1121f69ea6d830ab6f4bd2f51d017b792c17d1b1",
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java": "36984601502fcc812eb9d9a845fa10774e575653",
"lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java": "2703943d7980188a3da355490e7b72918910b369",
"lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java": "2680e0a7c7207ddf615f50fd22465c809904ac42",
"property:source": "https://github.com/jpbarrette/moman/archive/497c90e34e412b6494db6dabf0d95db8034bd325.zip"
}

View File

@ -16,9 +16,7 @@
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -63,40 +61,6 @@ class Packed64 extends PackedInts.MutableImpl {
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* Creates an array with content retrieved from the given DataInput.
*
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not be retrieved.
*/
public Packed64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
throws IOException {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final long byteCount =
format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
final int longCount =
format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
blocks = new long[longCount];
// read as many longs as we can
for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
final int remaining = (int) (byteCount % 8);
if (remaining != 0) {
// read the last bytes
long lastLong = 0;
for (int i = 0; i < remaining; ++i) {
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
}
blocks[blocks.length - 1] = lastLong;
}
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* @param index the position of the value.
* @return the value at the given index.

View File

@ -18,9 +18,7 @@
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -209,15 +207,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
+ ")";
}
public static Packed64SingleBlock create(DataInput in, int valueCount, int bitsPerValue)
throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 1:

View File

@ -658,35 +658,6 @@ public class PackedInts {
return BulkOperation.of(format, bitsPerValue);
}
/**
* Expert: Restore a {@link Reader} from a stream without reading metadata at the beginning of the
* stream. This method is useful to restore data from streams which have been created using {@link
* PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a Reader
* @throws IOException If there is a low-level I/O error
* @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal
*/
public static Reader getReaderNoHeader(
DataInput in, Format format, int version, int valueCount, int bitsPerValue)
throws IOException {
checkVersion(version);
switch (format) {
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED:
return new Packed64(version, in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknown Writer format: " + format);
}
}
/**
* Expert: Restore a {@link ReaderIterator} from a stream without reading metadata at the
* beginning of the stream. This method is useful to restore data from streams which have been
@ -788,7 +759,6 @@ public class PackedInts {
* @param mem how much memory (in bytes) can be used to speed up serialization
* @return a Writer
* @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
* @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
* @lucene.internal
*/
public static Writer getWriterNoHeader(

View File

@ -37,10 +37,7 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -221,15 +218,6 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
+ ",size=" + size() + ",blocks=" + blocks.length + ")";
}
public static Packed64SingleBlock create(DataInput in,
int valueCount, int bitsPerValue) throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
""" % (SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
FOOTER = "}"

View File

@ -126,35 +126,6 @@ public class TestPackedInts extends LuceneTestCase {
w.getFormat().byteCount(PackedInts.VERSION_CURRENT, valueCount, w.bitsPerValue);
assertEquals(bytes, fp - startFp);
{ // test reader
IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.Reader r =
PackedInts.getReaderNoHeader(
in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, valueCount, nbits);
assertEquals(fp, in.getFilePointer());
for (int i = 0; i < valueCount; i++) {
assertEquals(
"index="
+ i
+ " valueCount="
+ valueCount
+ " nbits="
+ nbits
+ " for "
+ r.getClass().getSimpleName(),
values[i],
r.get(i));
}
in.close();
final long expectedBytesUsed = RamUsageTester.sizeOf(r);
final long computedBytesUsed = r.ramBytesUsed();
assertEquals(
r.getClass() + "expected " + expectedBytesUsed + ", got: " + computedBytesUsed,
expectedBytesUsed,
computedBytesUsed);
}
{ // test reader iterator next
IndexInput in = d.openInput("out.bin", newIOContext(random()));
PackedInts.ReaderIterator r =
@ -260,11 +231,6 @@ public class TestPackedInts extends LuceneTestCase {
it.next();
}
assertEquals(msg, byteCount, in.getFilePointer());
// test reader
in.seek(0L);
PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
assertEquals(msg, byteCount, in.getFilePointer());
}
}
}
@ -442,33 +408,6 @@ public class TestPackedInts extends LuceneTestCase {
}
}
public void testSingleValue() throws Exception {
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random()));
PackedInts.Writer w =
PackedInts.getWriterNoHeader(
out, PackedInts.Format.PACKED, 1, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
long value = 17L & PackedInts.maxValue(bitsPerValue);
w.add(value);
w.finish();
final long end = out.getFilePointer();
out.close();
IndexInput in = dir.openInput("out", newIOContext(random()));
Reader reader =
PackedInts.getReaderNoHeader(
in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, 1, bitsPerValue);
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
assertEquals(msg, 1, reader.size());
assertEquals(msg, value, reader.get(0));
assertEquals(msg, end, in.getFilePointer());
in.close();
dir.close();
}
}
public void testSecondaryBlockChange() {
PackedInts.Mutable mutable = new Packed64(26, 5);
mutable.set(24, 31);