LUCENE-3892: fix constant names, cache encoded sizes instead of formats and move readVIntBlock to the postings reader.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1371184 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-08-09 12:52:24 +00:00
parent 0be8bfb4fc
commit 90918ad2d6
4 changed files with 86 additions and 91 deletions

View File

@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked;
*/
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.Arrays;
@ -122,6 +122,28 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
}
}
/**
* Read values that have been written using variable-length encoding instead of bit-packing.
*/
private static void readVIntBlock(IndexInput docIn, int[] docBuffer,
int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
if (indexHasFreq) {
for(int i=0;i<num;i++) {
final int code = docIn.readVInt();
docBuffer[i] = code >>> 1;
if ((code & 1) != 0) {
freqBuffer[i] = 1;
} else {
freqBuffer[i] = docIn.readVInt();
}
}
} else {
for(int i=0;i<num;i++) {
docBuffer[i] = docIn.readVInt();
}
}
}
// Must keep final because we do non-standard clone
private final static class IntBlockTermState extends BlockTermState {
long docStartFP;
@ -298,8 +320,8 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
final class BlockDocsEnum extends DocsEnum {
private final byte[] encoded;
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
private int docBufferUpto;
@ -341,7 +363,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
encoded = new byte[MIN_ENCODED_SIZE];
encoded = new byte[MAX_ENCODED_SIZE];
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@ -404,7 +426,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
if (DEBUG) {
System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer());
}
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
}
docBufferUpto = 0;
}
@ -548,9 +570,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
private final byte[] encoded;
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
private int docBufferUpto;
private int posBufferUpto;
@ -609,7 +631,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
this.startDocIn = BlockPackedPostingsReader.this.docIn;
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
encoded = new byte[MIN_ENCODED_SIZE];
encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
}
@ -678,7 +700,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
if (DEBUG) {
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
}
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
docBufferUpto = 0;
}
@ -952,9 +974,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
private final byte[] encoded;
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
private final int[] payloadLengthBuffer;
private final int[] offsetStartDeltaBuffer;
@ -1032,11 +1054,11 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
this.payIn = (IndexInput) BlockPackedPostingsReader.this.payIn.clone();
encoded = new byte[MIN_ENCODED_SIZE];
encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (indexHasOffsets) {
offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
offsetLengthBuffer = new int[MIN_DATA_SIZE];
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
offsetLengthBuffer = new int[MAX_DATA_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
@ -1046,7 +1068,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
indexHasPayloads = fieldInfo.hasPayloads();
if (indexHasPayloads) {
payloadLengthBuffer = new int[MIN_DATA_SIZE];
payloadLengthBuffer = new int[MAX_DATA_SIZE];
payloadBytes = new byte[128];
payload = new BytesRef();
} else {
@ -1120,7 +1142,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
if (DEBUG) {
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
}
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
docBufferUpto = 0;
}

View File

@ -19,8 +19,8 @@ package org.apache.lucene.codecs.blockpacked;
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsReader.DEBUG;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.ArrayList;
@ -125,22 +125,22 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new int[MIN_DATA_SIZE];
posDeltaBuffer = new int[MAX_DATA_SIZE];
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION),
state.context);
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
if (state.fieldInfos.hasPayloads()) {
payloadBytes = new byte[128];
payloadLengthBuffer = new int[MIN_DATA_SIZE];
payloadLengthBuffer = new int[MAX_DATA_SIZE];
} else {
payloadBytes = null;
payloadLengthBuffer = null;
}
if (state.fieldInfos.hasOffsets()) {
offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
offsetLengthBuffer = new int[MIN_DATA_SIZE];
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
offsetLengthBuffer = new int[MAX_DATA_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
@ -167,8 +167,8 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
}
}
docDeltaBuffer = new int[MIN_DATA_SIZE];
freqBuffer = new int[MIN_DATA_SIZE];
docDeltaBuffer = new int[MAX_DATA_SIZE];
freqBuffer = new int[MAX_DATA_SIZE];
// nocommit should we try skipping every 2/4 blocks...?
skipWriter = new BlockPackedSkipWriter(maxSkipLevels,
@ -178,7 +178,7 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
posOut,
payOut);
encoded = new byte[MIN_ENCODED_SIZE];
encoded = new byte[MAX_ENCODED_SIZE];
}
public BlockPackedPostingsWriter(SegmentWriteState state) throws IOException {

View File

@ -26,6 +26,7 @@ import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.Decoder;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
/**
@ -41,14 +42,18 @@ final class ForUtil {
private static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
/**
* Minimum length of the buffer that holds encoded bytes.
* Upper limit of the number of bytes that might be required to stored
* <code>BLOCK_SIZE</code> encoded values.
*/
static final int MIN_ENCODED_SIZE = BLOCK_SIZE * 4;
static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
/**
* Minimum length of the buffer that holds data.
* Upper limit of the number of values that might be decoded in a single call to
* {@link #readBlock(IndexInput, byte[], int[])}. Although values after
* <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
* whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
*/
static final int MIN_DATA_SIZE;
static final int MAX_DATA_SIZE;
static {
int minDataSize = 0;
for (PackedInts.Format format : PackedInts.Format.values()) {
@ -61,14 +66,26 @@ final class ForUtil {
minDataSize = Math.max(minDataSize, iterations * decoder.valueCount());
}
}
MIN_DATA_SIZE = minDataSize;
MAX_DATA_SIZE = minDataSize;
}
/**
* Compute the number of iterations required to decode <code>BLOCK_SIZE</code>
* values with the provided {@link Decoder}.
*/
private static int computeIterations(PackedInts.Decoder decoder) {
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
}
private final PackedInts.FormatAndBits[] formats;
/**
* Compute the number of bytes required to encode a block of values that require
* <code>bitsPerValue</code> bits per value with format <code>format</code>.
*/
private static int encodedSize(PackedInts.Format format, int bitsPerValue) {
return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3;
}
private final int[] encodedSizes;
private final PackedInts.Encoder[] encoders;
private final PackedInts.Decoder[] decoders;
private final int[] iterations;
@ -77,7 +94,7 @@ final class ForUtil {
* Create a new {@link ForUtil} instance and save state into <code>out</code>.
*/
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
formats = new PackedInts.FormatAndBits[33];
encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
iterations = new int[33];
@ -87,7 +104,7 @@ final class ForUtil {
BLOCK_SIZE, bpv, acceptableOverheadRatio);
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
assert formatAndBits.bitsPerValue <= 32;
formats[bpv] = formatAndBits;
encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
formatAndBits.format, PACKED_INTS_VERSION, formatAndBits.bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
@ -102,7 +119,7 @@ final class ForUtil {
* Restore a {@link ForUtil} from a {@link DataInput}.
*/
ForUtil(DataInput in) throws IOException {
formats = new PackedInts.FormatAndBits[33];
encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
iterations = new int[33];
@ -114,7 +131,7 @@ final class ForUtil {
final PackedInts.Format format = PackedInts.Format.byId(formatId);
assert format.isSupported(bitsPerValue);
formats[bpv] = new PackedInts.FormatAndBits(format, bitsPerValue);
encodedSizes[bpv] = encodedSize(format, bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
format, PACKED_INTS_VERSION, bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
@ -123,19 +140,6 @@ final class ForUtil {
}
}
/**
* Compute the minimum size of the buffer that holds values. This method exists
* because {@link Decoder}s cannot decode less than a given amount of blocks
* at a time.
*/
int getMinRequiredBufferSize() {
int minSize = 0;
for (int bpv = 1; bpv <= 32; ++bpv) {
minSize = Math.max(minSize, iterations[bpv] * decoders[bpv].valueCount());
}
return minSize;
}
/**
* Write a block of data (<code>For</code> format).
*
@ -156,7 +160,7 @@ final class ForUtil {
final PackedInts.Encoder encoder = encoders[numBits];
final int iters = iterations[numBits];
assert iters * encoder.valueCount() >= BLOCK_SIZE;
final int encodedSize = encodedSize(numBits);
final int encodedSize = encodedSizes[numBits];
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
out.writeVInt(numBits);
@ -183,7 +187,7 @@ final class ForUtil {
return;
}
final int encodedSize = encodedSize(numBits);
final int encodedSize = encodedSizes[numBits];
in.readBytes(encoded, 0, encodedSize);
final PackedInts.Decoder decoder = decoders[numBits];
@ -206,32 +210,10 @@ final class ForUtil {
return;
}
assert numBits > 0 && numBits <= 32 : numBits;
final int encodedSize = encodedSize(numBits);
final int encodedSize = encodedSizes[numBits];
in.seek(in.getFilePointer() + encodedSize);
}
/**
* Read values that have been written using variable-length encoding instead of bit-packing.
*/
static void readVIntBlock(IndexInput docIn, int[] docBuffer,
int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
if (indexHasFreq) {
for(int i=0;i<num;i++) {
final int code = docIn.readVInt();
docBuffer[i] = code >>> 1;
if ((code & 1) != 0) {
freqBuffer[i] = 1;
} else {
freqBuffer[i] = docIn.readVInt();
}
}
} else {
for(int i=0;i<num;i++) {
docBuffer[i] = docIn.readVInt();
}
}
}
// nocommit: we must have a util function for this, hmm?
private static boolean isAllEqual(final int[] data) {
final long v = data[0];
@ -255,13 +237,4 @@ final class ForUtil {
return PackedInts.bitsRequired(or);
}
/**
* Compute the number of bytes required to encode a block of values that require
* <code>bitsPerValue</code> bits per value.
*/
private int encodedSize(int bitsPerValue) {
final FormatAndBits formatAndBits = formats[bitsPerValue];
return formatAndBits.format.nblocks(formatAndBits.bitsPerValue, BLOCK_SIZE) << 3;
}
}

View File

@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked;
*/
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.Arrays;
@ -39,7 +39,7 @@ public class TestForUtil extends LuceneTestCase {
public void testEncodeDecode() throws IOException {
final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
final float acceptableOverheadRatio = random().nextFloat();
final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MIN_DATA_SIZE];
final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
for (int i = 0; i < iterations; ++i) {
final int bpv = random().nextInt(32);
if (bpv == 0) {
@ -66,7 +66,7 @@ public class TestForUtil extends LuceneTestCase {
for (int i = 0; i < iterations; ++i) {
forUtil.writeBlock(
Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length),
new byte[MIN_ENCODED_SIZE], out);
new byte[MAX_ENCODED_SIZE], out);
}
endPointer = out.getFilePointer();
out.close();
@ -81,8 +81,8 @@ public class TestForUtil extends LuceneTestCase {
forUtil.skipBlock(in);
continue;
}
final int[] restored = new int[MIN_DATA_SIZE];
forUtil.readBlock(in, new byte[MIN_ENCODED_SIZE], restored);
final int[] restored = new int[MAX_DATA_SIZE];
forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored);
assertArrayEquals(Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE),
Arrays.copyOf(restored, BLOCK_SIZE));
}