mirror of https://github.com/apache/lucene.git
LUCENE-4443: don't write unnecessary skipdata in BlockSkipWriter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391433 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09720113f8
commit
014e6e0c94
|
@ -34,6 +34,12 @@ Bug Fixes
|
|||
parameter is reset to the default (1), even if set otherwise.
|
||||
(Gilad Barkai via Shai Erera)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
|
||||
into the skipdata. You need to reindex any indexes created with
|
||||
this experimental codec. (Robert Muir)
|
||||
|
||||
======================= Lucene 4.0.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -202,7 +202,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li>
|
||||
* <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
|
||||
* <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
|
||||
* OffsetStart?, PayFPSkip?>?, SkipChildLevelPointer?</li>
|
||||
* PayFPSkip?>?, SkipChildLevelPointer?</li>
|
||||
* <li>PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}</li>
|
||||
* <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip
|
||||
* -->
|
||||
|
@ -250,7 +250,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
|
||||
* current term's TermFreqs, and stored as a difference sequence.</li>
|
||||
* <li>PayLength indicates the length of last payload.</li>
|
||||
* <li>OffsetStart indicates the first value of last offset pair.</li>
|
||||
* </ul>
|
||||
* </dd>
|
||||
* </dl>
|
||||
|
|
|
@ -72,8 +72,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
ioContext);
|
||||
CodecUtil.checkHeader(docIn,
|
||||
BlockPostingsWriter.DOC_CODEC,
|
||||
BlockPostingsWriter.VERSION_START,
|
||||
BlockPostingsWriter.VERSION_START);
|
||||
BlockPostingsWriter.VERSION_CURRENT,
|
||||
BlockPostingsWriter.VERSION_CURRENT);
|
||||
forUtil = new ForUtil(docIn);
|
||||
|
||||
if (fieldInfos.hasProx()) {
|
||||
|
@ -81,16 +81,16 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
ioContext);
|
||||
CodecUtil.checkHeader(posIn,
|
||||
BlockPostingsWriter.POS_CODEC,
|
||||
BlockPostingsWriter.VERSION_START,
|
||||
BlockPostingsWriter.VERSION_START);
|
||||
BlockPostingsWriter.VERSION_CURRENT,
|
||||
BlockPostingsWriter.VERSION_CURRENT);
|
||||
|
||||
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
|
||||
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION),
|
||||
ioContext);
|
||||
CodecUtil.checkHeader(payIn,
|
||||
BlockPostingsWriter.PAY_CODEC,
|
||||
BlockPostingsWriter.VERSION_START,
|
||||
BlockPostingsWriter.VERSION_START);
|
||||
BlockPostingsWriter.VERSION_CURRENT,
|
||||
BlockPostingsWriter.VERSION_CURRENT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,8 +110,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
// Make sure we are talking to the matching postings writer
|
||||
CodecUtil.checkHeader(termsIn,
|
||||
BlockPostingsWriter.TERMS_CODEC,
|
||||
BlockPostingsWriter.VERSION_START,
|
||||
BlockPostingsWriter.VERSION_START);
|
||||
BlockPostingsWriter.VERSION_CURRENT,
|
||||
BlockPostingsWriter.VERSION_CURRENT);
|
||||
final int indexBlockSize = termsIn.readVInt();
|
||||
if (indexBlockSize != BLOCK_SIZE) {
|
||||
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
|
||||
|
@ -1314,7 +1314,7 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
|||
posPendingFP = skipper.getPosPointer();
|
||||
payPendingFP = skipper.getPayPointer();
|
||||
posPendingCount = skipper.getPosBufferUpto();
|
||||
lastStartOffset = skipper.getStartOffset();
|
||||
lastStartOffset = 0; // new document
|
||||
payloadByteUpto = skipper.getPayloadByteUpto();
|
||||
}
|
||||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
|
|
|
@ -65,7 +65,8 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
|
||||
// Increment version to change it:
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443
|
||||
final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA;
|
||||
|
||||
final IndexOutput docOut;
|
||||
final IndexOutput posOut;
|
||||
|
@ -101,7 +102,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
private long lastBlockPosFP;
|
||||
private long lastBlockPayFP;
|
||||
private int lastBlockPosBufferUpto;
|
||||
private int lastBlockStartOffset;
|
||||
private int lastBlockPayloadByteUpto;
|
||||
|
||||
private int lastDocID;
|
||||
|
@ -232,7 +232,7 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
// if (DEBUG) {
|
||||
// System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
|
||||
// }
|
||||
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
|
||||
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
|
||||
}
|
||||
|
||||
final int docDelta = docID - lastDocID;
|
||||
|
@ -337,7 +337,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
|||
}
|
||||
lastBlockPosFP = posOut.getFilePointer();
|
||||
lastBlockPosBufferUpto = posBufferUpto;
|
||||
lastBlockStartOffset = lastStartOffset;
|
||||
lastBlockPayloadByteUpto = payloadByteUpto;
|
||||
}
|
||||
// if (DEBUG) {
|
||||
|
|
|
@ -58,12 +58,10 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
private long posPointer[];
|
||||
private long payPointer[];
|
||||
private int posBufferUpto[];
|
||||
private int startOffset[];
|
||||
private int payloadByteUpto[];
|
||||
|
||||
private long lastPosPointer;
|
||||
private long lastPayPointer;
|
||||
private int lastStartOffset;
|
||||
private int lastPayloadByteUpto;
|
||||
private long lastDocPointer;
|
||||
private int lastPosBufferUpto;
|
||||
|
@ -80,11 +78,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
} else {
|
||||
payloadByteUpto = null;
|
||||
}
|
||||
if (hasOffsets) {
|
||||
startOffset = new int[maxSkipLevels];
|
||||
} else {
|
||||
startOffset = null;
|
||||
}
|
||||
if (hasOffsets || hasPayloads) {
|
||||
payPointer = new long[maxSkipLevels];
|
||||
} else {
|
||||
|
@ -143,10 +136,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
return lastPayPointer;
|
||||
}
|
||||
|
||||
public int getStartOffset() {
|
||||
return lastStartOffset;
|
||||
}
|
||||
|
||||
public int getPayloadByteUpto() {
|
||||
return lastPayloadByteUpto;
|
||||
}
|
||||
|
@ -165,9 +154,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
if (posPointer != null) {
|
||||
posPointer[level] = lastPosPointer;
|
||||
posBufferUpto[level] = lastPosBufferUpto;
|
||||
if (startOffset != null) {
|
||||
startOffset[level] = lastStartOffset;
|
||||
}
|
||||
if (payloadByteUpto != null) {
|
||||
payloadByteUpto[level] = lastPayloadByteUpto;
|
||||
}
|
||||
|
@ -194,9 +180,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
if (payPointer != null) {
|
||||
lastPayPointer = payPointer[level];
|
||||
}
|
||||
if (startOffset != null) {
|
||||
lastStartOffset = startOffset[level];
|
||||
}
|
||||
if (payloadByteUpto != null) {
|
||||
lastPayloadByteUpto = payloadByteUpto[level];
|
||||
}
|
||||
|
@ -231,10 +214,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
|||
payloadByteUpto[level] = skipStream.readVInt();
|
||||
}
|
||||
|
||||
if (startOffset != null) {
|
||||
startOffset[level] += skipStream.readVInt();
|
||||
}
|
||||
|
||||
if (payPointer != null) {
|
||||
payPointer[level] += skipStream.readVInt();
|
||||
}
|
||||
|
|
|
@ -50,7 +50,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
private long[] lastSkipDocPointer;
|
||||
private long[] lastSkipPosPointer;
|
||||
private long[] lastSkipPayPointer;
|
||||
private int[] lastStartOffset;
|
||||
private int[] lastPayloadByteUpto;
|
||||
|
||||
private final IndexOutput docOut;
|
||||
|
@ -62,7 +61,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
private long curPosPointer;
|
||||
private long curPayPointer;
|
||||
private int curPosBufferUpto;
|
||||
private int curStartOffset;
|
||||
private int curPayloadByteUpto;
|
||||
private boolean fieldHasPositions;
|
||||
private boolean fieldHasOffsets;
|
||||
|
@ -81,7 +79,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
if (payOut != null) {
|
||||
lastSkipPayPointer = new long[maxSkipLevels];
|
||||
}
|
||||
lastStartOffset = new int[maxSkipLevels];
|
||||
lastPayloadByteUpto = new int[maxSkipLevels];
|
||||
}
|
||||
}
|
||||
|
@ -99,9 +96,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
|
||||
if (fieldHasPositions) {
|
||||
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
|
||||
if (fieldHasOffsets) {
|
||||
Arrays.fill(lastStartOffset, 0);
|
||||
}
|
||||
if (fieldHasPayloads) {
|
||||
Arrays.fill(lastPayloadByteUpto, 0);
|
||||
}
|
||||
|
@ -114,14 +108,13 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
/**
|
||||
* Sets the values for the current skip data.
|
||||
*/
|
||||
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int startOffset, int payloadByteUpto) throws IOException {
|
||||
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
||||
this.curDoc = doc;
|
||||
this.curDocPointer = docOut.getFilePointer();
|
||||
this.curPosPointer = posFP;
|
||||
this.curPayPointer = payFP;
|
||||
this.curPosBufferUpto = posBufferUpto;
|
||||
this.curPayloadByteUpto = payloadByteUpto;
|
||||
this.curStartOffset = startOffset;
|
||||
bufferSkip(numDocs);
|
||||
}
|
||||
|
||||
|
@ -149,11 +142,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
|||
skipBuffer.writeVInt(curPayloadByteUpto);
|
||||
}
|
||||
|
||||
if (fieldHasOffsets) {
|
||||
skipBuffer.writeVInt(curStartOffset - lastStartOffset[level]);
|
||||
lastStartOffset[level] = curStartOffset;
|
||||
}
|
||||
|
||||
if (fieldHasOffsets || fieldHasPayloads) {
|
||||
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
|
||||
lastSkipPayPointer[level] = curPayPointer;
|
||||
|
|
Loading…
Reference in New Issue