mirror of https://github.com/apache/lucene.git
LUCENE-4473: encode low-freq terms offsets more efficiently in blockPF
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1396867 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c2c2717c3e
commit
295e46da1f
|
@ -75,6 +75,9 @@ Optimizations
|
|||
failures in TestWeakIdentityMap disappear, too.
|
||||
(Uwe Schindler, Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-4473: BlockPostingsFormat encodes offsets more efficiently
|
||||
for low frequency terms (< 128 occurrences). (Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-4451: Memory leak per unique thread caused by
|
||||
|
|
|
@ -306,10 +306,10 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* PayloadLength is stored at the current position, then it indicates the length
|
||||
* of this payload. If PayloadLength is not stored, then this payload has the same
|
||||
* length as the payload at the previous position.</li>
|
||||
* <li>OffsetDelta is the difference between this position's startOffset from the
|
||||
* <li>OffsetDelta/2 is the difference between this position's startOffset from the
|
||||
* previous occurrence (or zero, if this is the first occurrence in this document).
|
||||
* OffsetLength follows, encoding the difference between endOffset and startOffset.
|
||||
* Offset data is only written for
|
||||
* If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
|
||||
* previous occurrence and an OffsetLength follows. Offset data is only written for
|
||||
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.</li>
|
||||
* </ul>
|
||||
* </dd>
|
||||
|
|
|
@ -729,8 +729,10 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
posDeltaBuffer[i] = code;
|
||||
}
|
||||
if (indexHasOffsets) {
|
||||
posIn.readVInt();
|
||||
posIn.readVInt();
|
||||
if ((posIn.readVInt() & 1) != 0) {
|
||||
// offset length changed
|
||||
posIn.readVInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -1149,6 +1151,7 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
// }
|
||||
final int count = posIn.readVInt();
|
||||
int payloadLength = 0;
|
||||
int offsetLength = 0;
|
||||
payloadByteUpto = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
int code = posIn.readVInt();
|
||||
|
@ -1177,8 +1180,12 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
// if (DEBUG) {
|
||||
// System.out.println(" i=" + i + " read offsets from posIn.fp=" + posIn.getFilePointer());
|
||||
// }
|
||||
offsetStartDeltaBuffer[i] = posIn.readVInt();
|
||||
offsetLengthBuffer[i] = posIn.readVInt();
|
||||
int deltaCode = posIn.readVInt();
|
||||
if ((deltaCode & 1) != 0) {
|
||||
offsetLength = posIn.readVInt();
|
||||
}
|
||||
offsetStartDeltaBuffer[i] = deltaCode >>> 1;
|
||||
offsetLengthBuffer[i] = offsetLength;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" startOffDelta=" + offsetStartDeltaBuffer[i] + " offsetLen=" + offsetLengthBuffer[i]);
|
||||
// }
|
||||
|
|
|
@ -424,7 +424,8 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
// majority)
|
||||
|
||||
// vInt encode the remaining positions/payloads/offsets:
|
||||
int lastPayloadLength = -1;
|
||||
int lastPayloadLength = -1; // force first payload length to be written
|
||||
int lastOffsetLength = -1; // force first offset length to be written
|
||||
int payloadBytesReadUpto = 0;
|
||||
for(int i=0;i<posBufferUpto;i++) {
|
||||
final int posDelta = posDeltaBuffer[i];
|
||||
|
@ -457,8 +458,15 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
// if (DEBUG) {
|
||||
// System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer());
|
||||
// }
|
||||
posOut.writeVInt(offsetStartDeltaBuffer[i]);
|
||||
posOut.writeVInt(offsetLengthBuffer[i]);
|
||||
int delta = offsetStartDeltaBuffer[i];
|
||||
int length = offsetLengthBuffer[i];
|
||||
if (length == lastOffsetLength) {
|
||||
posOut.writeVInt(delta << 1);
|
||||
} else {
|
||||
posOut.writeVInt(delta << 1 | 1);
|
||||
posOut.writeVInt(length);
|
||||
lastOffsetLength = length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue