From 90918ad2d604f841c3bac1c5c5beb07cbfe5e750 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 9 Aug 2012 12:52:24 +0000 Subject: [PATCH] LUCENE-3892: fix constant names, cache encoded sizes instead of formats and move readVIntBlock to the postings reader. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1371184 13f79535-47bb-0310-9956-ffa450edef68 --- .../BlockPackedPostingsReader.java | 60 +++++++++---- .../BlockPackedPostingsWriter.java | 18 ++-- .../lucene/codecs/blockpacked/ForUtil.java | 87 +++++++------------ .../codecs/blockpacked/TestForUtil.java | 12 +-- 4 files changed, 86 insertions(+), 91 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java index 72fc8aaa183..e5493a37e87 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java @@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked; */ import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; @@ -122,6 +122,28 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { } } + /** + * Read values that have been written using variable-length encoding instead of bit-packing. + */ + private static void readVIntBlock(IndexInput docIn, int[] docBuffer, + int[] freqBuffer, int num, boolean indexHasFreq) throws IOException { + if (indexHasFreq) { + for(int i=0;i>> 1; + if ((code & 1) != 0) { + freqBuffer[i] = 1; + } else { + freqBuffer[i] = docIn.readVInt(); + } + } + } else { + for(int i=0;i= 0; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); - encoded = new byte[MIN_ENCODED_SIZE]; + encoded = new byte[MAX_ENCODED_SIZE]; } public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { @@ -404,7 +426,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { if (DEBUG) { System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer()); } - ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq); + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq); } docBufferUpto = 0; } @@ -548,9 +570,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { private final byte[] encoded; - private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE]; - private final int[] freqBuffer = new int[MIN_DATA_SIZE]; - private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE]; + private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; + private final int[] freqBuffer = new int[MAX_DATA_SIZE]; + private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE]; private int docBufferUpto; private int posBufferUpto; @@ -609,7 +631,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { this.startDocIn = BlockPackedPostingsReader.this.docIn; this.docIn = (IndexInput) startDocIn.clone(); this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone(); - encoded = new byte[MIN_ENCODED_SIZE]; + encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); } @@ -678,7 +700,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { if (DEBUG) { System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); } - ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); } docBufferUpto = 0; } @@ -952,9 +974,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { private final byte[] encoded; - private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE]; - private final int[] freqBuffer = new int[MIN_DATA_SIZE]; - private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE]; + private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; + private final int[] freqBuffer = new int[MAX_DATA_SIZE]; + private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE]; private final int[] payloadLengthBuffer; private final int[] offsetStartDeltaBuffer; @@ -1032,11 +1054,11 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { this.docIn = (IndexInput) startDocIn.clone(); this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone(); this.payIn = (IndexInput) BlockPackedPostingsReader.this.payIn.clone(); - encoded = new byte[MIN_ENCODED_SIZE]; + encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (indexHasOffsets) { - offsetStartDeltaBuffer = new int[MIN_DATA_SIZE]; - offsetLengthBuffer = new int[MIN_DATA_SIZE]; + offsetStartDeltaBuffer = new int[MAX_DATA_SIZE]; + offsetLengthBuffer = new int[MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; @@ -1046,7 +1068,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { indexHasPayloads = fieldInfo.hasPayloads(); if (indexHasPayloads) { - payloadLengthBuffer = new int[MIN_DATA_SIZE]; + payloadLengthBuffer = new int[MAX_DATA_SIZE]; payloadBytes = new byte[128]; payload = new BytesRef(); } else { @@ -1120,7 +1142,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase { if (DEBUG) { System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); } - ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); } docBufferUpto = 0; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java index c83535d170f..fe716901c4a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java @@ -19,8 +19,8 @@ package org.apache.lucene.codecs.blockpacked; import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE; import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsReader.DEBUG; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.ArrayList; @@ -125,22 +125,22 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase { CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT); forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.fieldInfos.hasProx()) { - posDeltaBuffer = new int[MIN_DATA_SIZE]; + posDeltaBuffer = new int[MAX_DATA_SIZE]; posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION), state.context); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.fieldInfos.hasPayloads()) { payloadBytes = new byte[128]; - payloadLengthBuffer = new int[MIN_DATA_SIZE]; + payloadLengthBuffer = new int[MAX_DATA_SIZE]; } else { payloadBytes = null; payloadLengthBuffer = null; } if (state.fieldInfos.hasOffsets()) { - offsetStartDeltaBuffer = new int[MIN_DATA_SIZE]; - offsetLengthBuffer = new int[MIN_DATA_SIZE]; + offsetStartDeltaBuffer = new int[MAX_DATA_SIZE]; + offsetLengthBuffer = new int[MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; @@ -167,8 +167,8 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase { } } - docDeltaBuffer = new int[MIN_DATA_SIZE]; - freqBuffer = new int[MIN_DATA_SIZE]; + docDeltaBuffer = new int[MAX_DATA_SIZE]; + freqBuffer = new int[MAX_DATA_SIZE]; // nocommit should we try skipping every 2/4 blocks...? skipWriter = new BlockPackedSkipWriter(maxSkipLevels, @@ -178,7 +178,7 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase { posOut, payOut); - encoded = new byte[MIN_ENCODED_SIZE]; + encoded = new byte[MAX_ENCODED_SIZE]; } public BlockPackedPostingsWriter(SegmentWriteState state) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java index 73fab01442c..cf74672aa83 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java @@ -26,6 +26,7 @@ import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedInts.Decoder; import org.apache.lucene.util.packed.PackedInts.FormatAndBits; /** @@ -41,14 +42,18 @@ final class ForUtil { private static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream? /** - * Minimum length of the buffer that holds encoded bytes. + * Upper limit of the number of bytes that might be required to stored + * BLOCK_SIZE encoded values. */ - static final int MIN_ENCODED_SIZE = BLOCK_SIZE * 4; + static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4; /** - * Minimum length of the buffer that holds data. + * Upper limit of the number of values that might be decoded in a single call to + * {@link #readBlock(IndexInput, byte[], int[])}. Although values after + * BLOCK_SIZE are garbage, it is necessary to allocate value buffers + * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s. */ - static final int MIN_DATA_SIZE; + static final int MAX_DATA_SIZE; static { int minDataSize = 0; for (PackedInts.Format format : PackedInts.Format.values()) { @@ -61,14 +66,26 @@ final class ForUtil { minDataSize = Math.max(minDataSize, iterations * decoder.valueCount()); } } - MIN_DATA_SIZE = minDataSize; + MAX_DATA_SIZE = minDataSize; } + /** + * Compute the number of iterations required to decode BLOCK_SIZE + * values with the provided {@link Decoder}. + */ private static int computeIterations(PackedInts.Decoder decoder) { return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount()); } - private final PackedInts.FormatAndBits[] formats; + /** + * Compute the number of bytes required to encode a block of values that require + * bitsPerValue bits per value with format format. + */ + private static int encodedSize(PackedInts.Format format, int bitsPerValue) { + return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3; + } + + private final int[] encodedSizes; private final PackedInts.Encoder[] encoders; private final PackedInts.Decoder[] decoders; private final int[] iterations; @@ -77,7 +94,7 @@ final class ForUtil { * Create a new {@link ForUtil} instance and save state into out. */ ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException { - formats = new PackedInts.FormatAndBits[33]; + encodedSizes = new int[33]; encoders = new PackedInts.Encoder[33]; decoders = new PackedInts.Decoder[33]; iterations = new int[33]; @@ -87,7 +104,7 @@ final class ForUtil { BLOCK_SIZE, bpv, acceptableOverheadRatio); assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue); assert formatAndBits.bitsPerValue <= 32; - formats[bpv] = formatAndBits; + encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue); encoders[bpv] = PackedInts.getEncoder( formatAndBits.format, PACKED_INTS_VERSION, formatAndBits.bitsPerValue); decoders[bpv] = PackedInts.getDecoder( @@ -102,7 +119,7 @@ final class ForUtil { * Restore a {@link ForUtil} from a {@link DataInput}. */ ForUtil(DataInput in) throws IOException { - formats = new PackedInts.FormatAndBits[33]; + encodedSizes = new int[33]; encoders = new PackedInts.Encoder[33]; decoders = new PackedInts.Decoder[33]; iterations = new int[33]; @@ -114,7 +131,7 @@ final class ForUtil { final PackedInts.Format format = PackedInts.Format.byId(formatId); assert format.isSupported(bitsPerValue); - formats[bpv] = new PackedInts.FormatAndBits(format, bitsPerValue); + encodedSizes[bpv] = encodedSize(format, bitsPerValue); encoders[bpv] = PackedInts.getEncoder( format, PACKED_INTS_VERSION, bitsPerValue); decoders[bpv] = PackedInts.getDecoder( @@ -123,19 +140,6 @@ final class ForUtil { } } - /** - * Compute the minimum size of the buffer that holds values. This method exists - * because {@link Decoder}s cannot decode less than a given amount of blocks - * at a time. - */ - int getMinRequiredBufferSize() { - int minSize = 0; - for (int bpv = 1; bpv <= 32; ++bpv) { - minSize = Math.max(minSize, iterations[bpv] * decoders[bpv].valueCount()); - } - return minSize; - } - /** * Write a block of data (For format). * @@ -156,7 +160,7 @@ final class ForUtil { final PackedInts.Encoder encoder = encoders[numBits]; final int iters = iterations[numBits]; assert iters * encoder.valueCount() >= BLOCK_SIZE; - final int encodedSize = encodedSize(numBits); + final int encodedSize = encodedSizes[numBits]; assert (iters * encoder.blockCount()) << 3 >= encodedSize; out.writeVInt(numBits); @@ -183,7 +187,7 @@ final class ForUtil { return; } - final int encodedSize = encodedSize(numBits); + final int encodedSize = encodedSizes[numBits]; in.readBytes(encoded, 0, encodedSize); final PackedInts.Decoder decoder = decoders[numBits]; @@ -206,32 +210,10 @@ final class ForUtil { return; } assert numBits > 0 && numBits <= 32 : numBits; - final int encodedSize = encodedSize(numBits); + final int encodedSize = encodedSizes[numBits]; in.seek(in.getFilePointer() + encodedSize); } - /** - * Read values that have been written using variable-length encoding instead of bit-packing. - */ - static void readVIntBlock(IndexInput docIn, int[] docBuffer, - int[] freqBuffer, int num, boolean indexHasFreq) throws IOException { - if (indexHasFreq) { - for(int i=0;i>> 1; - if ((code & 1) != 0) { - freqBuffer[i] = 1; - } else { - freqBuffer[i] = docIn.readVInt(); - } - } - } else { - for(int i=0;ibitsPerValue bits per value. - */ - private int encodedSize(int bitsPerValue) { - final FormatAndBits formatAndBits = formats[bitsPerValue]; - return formatAndBits.format.nblocks(formatAndBits.bitsPerValue, BLOCK_SIZE) << 3; - } - } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java index 158328fb024..983b8df7b2f 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java @@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked; */ import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE; -import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; @@ -39,7 +39,7 @@ public class TestForUtil extends LuceneTestCase { public void testEncodeDecode() throws IOException { final int iterations = RandomInts.randomIntBetween(random(), 1, 1000); final float acceptableOverheadRatio = random().nextFloat(); - final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MIN_DATA_SIZE]; + final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { final int bpv = random().nextInt(32); if (bpv == 0) { @@ -66,7 +66,7 @@ public class TestForUtil extends LuceneTestCase { for (int i = 0; i < iterations; ++i) { forUtil.writeBlock( Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length), - new byte[MIN_ENCODED_SIZE], out); + new byte[MAX_ENCODED_SIZE], out); } endPointer = out.getFilePointer(); out.close(); @@ -81,8 +81,8 @@ public class TestForUtil extends LuceneTestCase { forUtil.skipBlock(in); continue; } - final int[] restored = new int[MIN_DATA_SIZE]; - forUtil.readBlock(in, new byte[MIN_ENCODED_SIZE], restored); + final int[] restored = new int[MAX_DATA_SIZE]; + forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored); assertArrayEquals(Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE), Arrays.copyOf(restored, BLOCK_SIZE)); }