mirror of https://github.com/apache/lucene.git
LUCENE-3892: fix constant names, cache encoded sizes instead of formats and move readVIntBlock to the postings reader.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1371184 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0be8bfb4fc
commit
90918ad2d6
|
@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -122,6 +122,28 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read values that have been written using variable-length encoding instead of bit-packing.
|
||||||
|
*/
|
||||||
|
private static void readVIntBlock(IndexInput docIn, int[] docBuffer,
|
||||||
|
int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
|
||||||
|
if (indexHasFreq) {
|
||||||
|
for(int i=0;i<num;i++) {
|
||||||
|
final int code = docIn.readVInt();
|
||||||
|
docBuffer[i] = code >>> 1;
|
||||||
|
if ((code & 1) != 0) {
|
||||||
|
freqBuffer[i] = 1;
|
||||||
|
} else {
|
||||||
|
freqBuffer[i] = docIn.readVInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for(int i=0;i<num;i++) {
|
||||||
|
docBuffer[i] = docIn.readVInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Must keep final because we do non-standard clone
|
// Must keep final because we do non-standard clone
|
||||||
private final static class IntBlockTermState extends BlockTermState {
|
private final static class IntBlockTermState extends BlockTermState {
|
||||||
long docStartFP;
|
long docStartFP;
|
||||||
|
@ -298,8 +320,8 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
final class BlockDocsEnum extends DocsEnum {
|
final class BlockDocsEnum extends DocsEnum {
|
||||||
private final byte[] encoded;
|
private final byte[] encoded;
|
||||||
|
|
||||||
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
|
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
|
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
|
||||||
|
|
||||||
private int docBufferUpto;
|
private int docBufferUpto;
|
||||||
|
|
||||||
|
@ -341,7 +363,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
indexHasPayloads = fieldInfo.hasPayloads();
|
indexHasPayloads = fieldInfo.hasPayloads();
|
||||||
encoded = new byte[MIN_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||||
|
@ -404,7 +426,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer());
|
System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer());
|
||||||
}
|
}
|
||||||
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
|
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
|
||||||
}
|
}
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
}
|
}
|
||||||
|
@ -548,9 +570,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
private final byte[] encoded;
|
private final byte[] encoded;
|
||||||
|
|
||||||
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
|
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
|
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
|
||||||
private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
|
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
|
|
||||||
private int docBufferUpto;
|
private int docBufferUpto;
|
||||||
private int posBufferUpto;
|
private int posBufferUpto;
|
||||||
|
@ -609,7 +631,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
this.startDocIn = BlockPackedPostingsReader.this.docIn;
|
this.startDocIn = BlockPackedPostingsReader.this.docIn;
|
||||||
this.docIn = (IndexInput) startDocIn.clone();
|
this.docIn = (IndexInput) startDocIn.clone();
|
||||||
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
|
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
|
||||||
encoded = new byte[MIN_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
indexHasPayloads = fieldInfo.hasPayloads();
|
indexHasPayloads = fieldInfo.hasPayloads();
|
||||||
}
|
}
|
||||||
|
@ -678,7 +700,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
||||||
}
|
}
|
||||||
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
||||||
}
|
}
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
}
|
}
|
||||||
|
@ -952,9 +974,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
private final byte[] encoded;
|
private final byte[] encoded;
|
||||||
|
|
||||||
private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
|
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
private final int[] freqBuffer = new int[MIN_DATA_SIZE];
|
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
|
||||||
private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
|
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
|
|
||||||
private final int[] payloadLengthBuffer;
|
private final int[] payloadLengthBuffer;
|
||||||
private final int[] offsetStartDeltaBuffer;
|
private final int[] offsetStartDeltaBuffer;
|
||||||
|
@ -1032,11 +1054,11 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
this.docIn = (IndexInput) startDocIn.clone();
|
this.docIn = (IndexInput) startDocIn.clone();
|
||||||
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
|
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
|
||||||
this.payIn = (IndexInput) BlockPackedPostingsReader.this.payIn.clone();
|
this.payIn = (IndexInput) BlockPackedPostingsReader.this.payIn.clone();
|
||||||
encoded = new byte[MIN_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
if (indexHasOffsets) {
|
if (indexHasOffsets) {
|
||||||
offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
|
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
offsetLengthBuffer = new int[MIN_DATA_SIZE];
|
offsetLengthBuffer = new int[MAX_DATA_SIZE];
|
||||||
} else {
|
} else {
|
||||||
offsetStartDeltaBuffer = null;
|
offsetStartDeltaBuffer = null;
|
||||||
offsetLengthBuffer = null;
|
offsetLengthBuffer = null;
|
||||||
|
@ -1046,7 +1068,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
indexHasPayloads = fieldInfo.hasPayloads();
|
indexHasPayloads = fieldInfo.hasPayloads();
|
||||||
if (indexHasPayloads) {
|
if (indexHasPayloads) {
|
||||||
payloadLengthBuffer = new int[MIN_DATA_SIZE];
|
payloadLengthBuffer = new int[MAX_DATA_SIZE];
|
||||||
payloadBytes = new byte[128];
|
payloadBytes = new byte[128];
|
||||||
payload = new BytesRef();
|
payload = new BytesRef();
|
||||||
} else {
|
} else {
|
||||||
|
@ -1120,7 +1142,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
||||||
}
|
}
|
||||||
ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
||||||
}
|
}
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.codecs.blockpacked;
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsReader.DEBUG;
|
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsReader.DEBUG;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -125,22 +125,22 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
|
||||||
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
|
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
|
||||||
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
|
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
|
||||||
if (state.fieldInfos.hasProx()) {
|
if (state.fieldInfos.hasProx()) {
|
||||||
posDeltaBuffer = new int[MIN_DATA_SIZE];
|
posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION),
|
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION),
|
||||||
state.context);
|
state.context);
|
||||||
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
|
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
|
||||||
|
|
||||||
if (state.fieldInfos.hasPayloads()) {
|
if (state.fieldInfos.hasPayloads()) {
|
||||||
payloadBytes = new byte[128];
|
payloadBytes = new byte[128];
|
||||||
payloadLengthBuffer = new int[MIN_DATA_SIZE];
|
payloadLengthBuffer = new int[MAX_DATA_SIZE];
|
||||||
} else {
|
} else {
|
||||||
payloadBytes = null;
|
payloadBytes = null;
|
||||||
payloadLengthBuffer = null;
|
payloadLengthBuffer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state.fieldInfos.hasOffsets()) {
|
if (state.fieldInfos.hasOffsets()) {
|
||||||
offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
|
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
offsetLengthBuffer = new int[MIN_DATA_SIZE];
|
offsetLengthBuffer = new int[MAX_DATA_SIZE];
|
||||||
} else {
|
} else {
|
||||||
offsetStartDeltaBuffer = null;
|
offsetStartDeltaBuffer = null;
|
||||||
offsetLengthBuffer = null;
|
offsetLengthBuffer = null;
|
||||||
|
@ -167,8 +167,8 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
docDeltaBuffer = new int[MIN_DATA_SIZE];
|
docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||||
freqBuffer = new int[MIN_DATA_SIZE];
|
freqBuffer = new int[MAX_DATA_SIZE];
|
||||||
|
|
||||||
// nocommit should we try skipping every 2/4 blocks...?
|
// nocommit should we try skipping every 2/4 blocks...?
|
||||||
skipWriter = new BlockPackedSkipWriter(maxSkipLevels,
|
skipWriter = new BlockPackedSkipWriter(maxSkipLevels,
|
||||||
|
@ -178,7 +178,7 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
|
||||||
posOut,
|
posOut,
|
||||||
payOut);
|
payOut);
|
||||||
|
|
||||||
encoded = new byte[MIN_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
}
|
}
|
||||||
|
|
||||||
public BlockPackedPostingsWriter(SegmentWriteState state) throws IOException {
|
public BlockPackedPostingsWriter(SegmentWriteState state) throws IOException {
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Decoder;
|
||||||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -41,14 +42,18 @@ final class ForUtil {
|
||||||
private static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
|
private static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Minimum length of the buffer that holds encoded bytes.
|
* Upper limit of the number of bytes that might be required to stored
|
||||||
|
* <code>BLOCK_SIZE</code> encoded values.
|
||||||
*/
|
*/
|
||||||
static final int MIN_ENCODED_SIZE = BLOCK_SIZE * 4;
|
static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Minimum length of the buffer that holds data.
|
* Upper limit of the number of values that might be decoded in a single call to
|
||||||
|
* {@link #readBlock(IndexInput, byte[], int[])}. Although values after
|
||||||
|
* <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
|
||||||
|
* whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
|
||||||
*/
|
*/
|
||||||
static final int MIN_DATA_SIZE;
|
static final int MAX_DATA_SIZE;
|
||||||
static {
|
static {
|
||||||
int minDataSize = 0;
|
int minDataSize = 0;
|
||||||
for (PackedInts.Format format : PackedInts.Format.values()) {
|
for (PackedInts.Format format : PackedInts.Format.values()) {
|
||||||
|
@ -61,14 +66,26 @@ final class ForUtil {
|
||||||
minDataSize = Math.max(minDataSize, iterations * decoder.valueCount());
|
minDataSize = Math.max(minDataSize, iterations * decoder.valueCount());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MIN_DATA_SIZE = minDataSize;
|
MAX_DATA_SIZE = minDataSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the number of iterations required to decode <code>BLOCK_SIZE</code>
|
||||||
|
* values with the provided {@link Decoder}.
|
||||||
|
*/
|
||||||
private static int computeIterations(PackedInts.Decoder decoder) {
|
private static int computeIterations(PackedInts.Decoder decoder) {
|
||||||
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
|
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
private final PackedInts.FormatAndBits[] formats;
|
/**
|
||||||
|
* Compute the number of bytes required to encode a block of values that require
|
||||||
|
* <code>bitsPerValue</code> bits per value with format <code>format</code>.
|
||||||
|
*/
|
||||||
|
private static int encodedSize(PackedInts.Format format, int bitsPerValue) {
|
||||||
|
return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int[] encodedSizes;
|
||||||
private final PackedInts.Encoder[] encoders;
|
private final PackedInts.Encoder[] encoders;
|
||||||
private final PackedInts.Decoder[] decoders;
|
private final PackedInts.Decoder[] decoders;
|
||||||
private final int[] iterations;
|
private final int[] iterations;
|
||||||
|
@ -77,7 +94,7 @@ final class ForUtil {
|
||||||
* Create a new {@link ForUtil} instance and save state into <code>out</code>.
|
* Create a new {@link ForUtil} instance and save state into <code>out</code>.
|
||||||
*/
|
*/
|
||||||
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
|
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
|
||||||
formats = new PackedInts.FormatAndBits[33];
|
encodedSizes = new int[33];
|
||||||
encoders = new PackedInts.Encoder[33];
|
encoders = new PackedInts.Encoder[33];
|
||||||
decoders = new PackedInts.Decoder[33];
|
decoders = new PackedInts.Decoder[33];
|
||||||
iterations = new int[33];
|
iterations = new int[33];
|
||||||
|
@ -87,7 +104,7 @@ final class ForUtil {
|
||||||
BLOCK_SIZE, bpv, acceptableOverheadRatio);
|
BLOCK_SIZE, bpv, acceptableOverheadRatio);
|
||||||
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
|
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
|
||||||
assert formatAndBits.bitsPerValue <= 32;
|
assert formatAndBits.bitsPerValue <= 32;
|
||||||
formats[bpv] = formatAndBits;
|
encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue);
|
||||||
encoders[bpv] = PackedInts.getEncoder(
|
encoders[bpv] = PackedInts.getEncoder(
|
||||||
formatAndBits.format, PACKED_INTS_VERSION, formatAndBits.bitsPerValue);
|
formatAndBits.format, PACKED_INTS_VERSION, formatAndBits.bitsPerValue);
|
||||||
decoders[bpv] = PackedInts.getDecoder(
|
decoders[bpv] = PackedInts.getDecoder(
|
||||||
|
@ -102,7 +119,7 @@ final class ForUtil {
|
||||||
* Restore a {@link ForUtil} from a {@link DataInput}.
|
* Restore a {@link ForUtil} from a {@link DataInput}.
|
||||||
*/
|
*/
|
||||||
ForUtil(DataInput in) throws IOException {
|
ForUtil(DataInput in) throws IOException {
|
||||||
formats = new PackedInts.FormatAndBits[33];
|
encodedSizes = new int[33];
|
||||||
encoders = new PackedInts.Encoder[33];
|
encoders = new PackedInts.Encoder[33];
|
||||||
decoders = new PackedInts.Decoder[33];
|
decoders = new PackedInts.Decoder[33];
|
||||||
iterations = new int[33];
|
iterations = new int[33];
|
||||||
|
@ -114,7 +131,7 @@ final class ForUtil {
|
||||||
|
|
||||||
final PackedInts.Format format = PackedInts.Format.byId(formatId);
|
final PackedInts.Format format = PackedInts.Format.byId(formatId);
|
||||||
assert format.isSupported(bitsPerValue);
|
assert format.isSupported(bitsPerValue);
|
||||||
formats[bpv] = new PackedInts.FormatAndBits(format, bitsPerValue);
|
encodedSizes[bpv] = encodedSize(format, bitsPerValue);
|
||||||
encoders[bpv] = PackedInts.getEncoder(
|
encoders[bpv] = PackedInts.getEncoder(
|
||||||
format, PACKED_INTS_VERSION, bitsPerValue);
|
format, PACKED_INTS_VERSION, bitsPerValue);
|
||||||
decoders[bpv] = PackedInts.getDecoder(
|
decoders[bpv] = PackedInts.getDecoder(
|
||||||
|
@ -123,19 +140,6 @@ final class ForUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the minimum size of the buffer that holds values. This method exists
|
|
||||||
* because {@link Decoder}s cannot decode less than a given amount of blocks
|
|
||||||
* at a time.
|
|
||||||
*/
|
|
||||||
int getMinRequiredBufferSize() {
|
|
||||||
int minSize = 0;
|
|
||||||
for (int bpv = 1; bpv <= 32; ++bpv) {
|
|
||||||
minSize = Math.max(minSize, iterations[bpv] * decoders[bpv].valueCount());
|
|
||||||
}
|
|
||||||
return minSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write a block of data (<code>For</code> format).
|
* Write a block of data (<code>For</code> format).
|
||||||
*
|
*
|
||||||
|
@ -156,7 +160,7 @@ final class ForUtil {
|
||||||
final PackedInts.Encoder encoder = encoders[numBits];
|
final PackedInts.Encoder encoder = encoders[numBits];
|
||||||
final int iters = iterations[numBits];
|
final int iters = iterations[numBits];
|
||||||
assert iters * encoder.valueCount() >= BLOCK_SIZE;
|
assert iters * encoder.valueCount() >= BLOCK_SIZE;
|
||||||
final int encodedSize = encodedSize(numBits);
|
final int encodedSize = encodedSizes[numBits];
|
||||||
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
|
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
|
||||||
|
|
||||||
out.writeVInt(numBits);
|
out.writeVInt(numBits);
|
||||||
|
@ -183,7 +187,7 @@ final class ForUtil {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int encodedSize = encodedSize(numBits);
|
final int encodedSize = encodedSizes[numBits];
|
||||||
in.readBytes(encoded, 0, encodedSize);
|
in.readBytes(encoded, 0, encodedSize);
|
||||||
|
|
||||||
final PackedInts.Decoder decoder = decoders[numBits];
|
final PackedInts.Decoder decoder = decoders[numBits];
|
||||||
|
@ -206,32 +210,10 @@ final class ForUtil {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assert numBits > 0 && numBits <= 32 : numBits;
|
assert numBits > 0 && numBits <= 32 : numBits;
|
||||||
final int encodedSize = encodedSize(numBits);
|
final int encodedSize = encodedSizes[numBits];
|
||||||
in.seek(in.getFilePointer() + encodedSize);
|
in.seek(in.getFilePointer() + encodedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Read values that have been written using variable-length encoding instead of bit-packing.
|
|
||||||
*/
|
|
||||||
static void readVIntBlock(IndexInput docIn, int[] docBuffer,
|
|
||||||
int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
|
|
||||||
if (indexHasFreq) {
|
|
||||||
for(int i=0;i<num;i++) {
|
|
||||||
final int code = docIn.readVInt();
|
|
||||||
docBuffer[i] = code >>> 1;
|
|
||||||
if ((code & 1) != 0) {
|
|
||||||
freqBuffer[i] = 1;
|
|
||||||
} else {
|
|
||||||
freqBuffer[i] = docIn.readVInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for(int i=0;i<num;i++) {
|
|
||||||
docBuffer[i] = docIn.readVInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// nocommit: we must have a util function for this, hmm?
|
// nocommit: we must have a util function for this, hmm?
|
||||||
private static boolean isAllEqual(final int[] data) {
|
private static boolean isAllEqual(final int[] data) {
|
||||||
final long v = data[0];
|
final long v = data[0];
|
||||||
|
@ -255,13 +237,4 @@ final class ForUtil {
|
||||||
return PackedInts.bitsRequired(or);
|
return PackedInts.bitsRequired(or);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the number of bytes required to encode a block of values that require
|
|
||||||
* <code>bitsPerValue</code> bits per value.
|
|
||||||
*/
|
|
||||||
private int encodedSize(int bitsPerValue) {
|
|
||||||
final FormatAndBits formatAndBits = formats[bitsPerValue];
|
|
||||||
return formatAndBits.format.nblocks(formatAndBits.bitsPerValue, BLOCK_SIZE) << 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpacked;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
|
||||||
import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
|
import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -39,7 +39,7 @@ public class TestForUtil extends LuceneTestCase {
|
||||||
public void testEncodeDecode() throws IOException {
|
public void testEncodeDecode() throws IOException {
|
||||||
final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
|
final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
|
||||||
final float acceptableOverheadRatio = random().nextFloat();
|
final float acceptableOverheadRatio = random().nextFloat();
|
||||||
final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MIN_DATA_SIZE];
|
final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
|
||||||
for (int i = 0; i < iterations; ++i) {
|
for (int i = 0; i < iterations; ++i) {
|
||||||
final int bpv = random().nextInt(32);
|
final int bpv = random().nextInt(32);
|
||||||
if (bpv == 0) {
|
if (bpv == 0) {
|
||||||
|
@ -66,7 +66,7 @@ public class TestForUtil extends LuceneTestCase {
|
||||||
for (int i = 0; i < iterations; ++i) {
|
for (int i = 0; i < iterations; ++i) {
|
||||||
forUtil.writeBlock(
|
forUtil.writeBlock(
|
||||||
Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length),
|
Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length),
|
||||||
new byte[MIN_ENCODED_SIZE], out);
|
new byte[MAX_ENCODED_SIZE], out);
|
||||||
}
|
}
|
||||||
endPointer = out.getFilePointer();
|
endPointer = out.getFilePointer();
|
||||||
out.close();
|
out.close();
|
||||||
|
@ -81,8 +81,8 @@ public class TestForUtil extends LuceneTestCase {
|
||||||
forUtil.skipBlock(in);
|
forUtil.skipBlock(in);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final int[] restored = new int[MIN_DATA_SIZE];
|
final int[] restored = new int[MAX_DATA_SIZE];
|
||||||
forUtil.readBlock(in, new byte[MIN_ENCODED_SIZE], restored);
|
forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored);
|
||||||
assertArrayEquals(Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE),
|
assertArrayEquals(Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE),
|
||||||
Arrays.copyOf(restored, BLOCK_SIZE));
|
Arrays.copyOf(restored, BLOCK_SIZE));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue