LUCENE-4443: don't write unnecessary skipdata in BlockSkipWriter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391433 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-28 12:49:53 +00:00
parent 09720113f8
commit 014e6e0c94
6 changed files with 20 additions and 49 deletions

View File

@ -33,6 +33,12 @@ Bug Fixes
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
parameter is reset to the default (1), even if set otherwise.
(Gilad Barkai via Shai Erera)
Optimizations
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
into the skipdata. You need to reindex any indexes created with
this experimental codec. (Robert Muir)
======================= Lucene 4.0.0 =======================

View File

@ -202,7 +202,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <sup>NumSkipLevels-1</sup>, SkipLevel&gt;, SkipDatum?</li>
* <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
* <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?,
* OffsetStart?, PayFPSkip?&gt;?, SkipChildLevelPointer?</li>
* PayFPSkip?&gt;?, SkipChildLevelPointer?</li>
* <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}</li>
* <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip
* --&gt;
@ -250,7 +250,6 @@ import org.apache.lucene.util.packed.PackedInts;
* equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
* current term's TermFreqs, and stored as a difference sequence.</li>
* <li>PayLength indicates the length of last payload.</li>
* <li>OffsetStart indicates the first value of last offset pair.</li>
* </ul>
* </dd>
* </dl>

View File

@ -72,8 +72,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
ioContext);
CodecUtil.checkHeader(docIn,
BlockPostingsWriter.DOC_CODEC,
BlockPostingsWriter.VERSION_START,
BlockPostingsWriter.VERSION_START);
BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_CURRENT);
forUtil = new ForUtil(docIn);
if (fieldInfos.hasProx()) {
@ -81,16 +81,16 @@ final class BlockPostingsReader extends PostingsReaderBase {
ioContext);
CodecUtil.checkHeader(posIn,
BlockPostingsWriter.POS_CODEC,
BlockPostingsWriter.VERSION_START,
BlockPostingsWriter.VERSION_START);
BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_CURRENT);
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION),
ioContext);
CodecUtil.checkHeader(payIn,
BlockPostingsWriter.PAY_CODEC,
BlockPostingsWriter.VERSION_START,
BlockPostingsWriter.VERSION_START);
BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_CURRENT);
}
}
@ -110,8 +110,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
// Make sure we are talking to the matching postings writer
CodecUtil.checkHeader(termsIn,
BlockPostingsWriter.TERMS_CODEC,
BlockPostingsWriter.VERSION_START,
BlockPostingsWriter.VERSION_START);
BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_CURRENT);
final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
@ -1314,7 +1314,7 @@ final class BlockPostingsReader extends PostingsReaderBase {
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = skipper.getStartOffset();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto();
}
nextSkipDoc = skipper.getNextSkipDoc();

View File

@ -65,7 +65,8 @@ final class BlockPostingsWriter extends PostingsWriterBase {
// Increment version to change it:
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443
final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA;
final IndexOutput docOut;
final IndexOutput posOut;
@ -101,7 +102,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
private long lastBlockPosFP;
private long lastBlockPayFP;
private int lastBlockPosBufferUpto;
private int lastBlockStartOffset;
private int lastBlockPayloadByteUpto;
private int lastDocID;
@ -232,7 +232,7 @@ final class BlockPostingsWriter extends PostingsWriterBase {
// if (DEBUG) {
// System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
// }
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
}
final int docDelta = docID - lastDocID;
@ -337,7 +337,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
}
lastBlockPosFP = posOut.getFilePointer();
lastBlockPosBufferUpto = posBufferUpto;
lastBlockStartOffset = lastStartOffset;
lastBlockPayloadByteUpto = payloadByteUpto;
}
// if (DEBUG) {

View File

@ -58,12 +58,10 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
private long posPointer[];
private long payPointer[];
private int posBufferUpto[];
private int startOffset[];
private int payloadByteUpto[];
private long lastPosPointer;
private long lastPayPointer;
private int lastStartOffset;
private int lastPayloadByteUpto;
private long lastDocPointer;
private int lastPosBufferUpto;
@ -80,11 +78,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
} else {
payloadByteUpto = null;
}
if (hasOffsets) {
startOffset = new int[maxSkipLevels];
} else {
startOffset = null;
}
if (hasOffsets || hasPayloads) {
payPointer = new long[maxSkipLevels];
} else {
@ -143,10 +136,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
return lastPayPointer;
}
public int getStartOffset() {
return lastStartOffset;
}
public int getPayloadByteUpto() {
return lastPayloadByteUpto;
}
@ -165,9 +154,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
if (posPointer != null) {
posPointer[level] = lastPosPointer;
posBufferUpto[level] = lastPosBufferUpto;
if (startOffset != null) {
startOffset[level] = lastStartOffset;
}
if (payloadByteUpto != null) {
payloadByteUpto[level] = lastPayloadByteUpto;
}
@ -194,9 +180,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
if (payPointer != null) {
lastPayPointer = payPointer[level];
}
if (startOffset != null) {
lastStartOffset = startOffset[level];
}
if (payloadByteUpto != null) {
lastPayloadByteUpto = payloadByteUpto[level];
}
@ -231,10 +214,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
payloadByteUpto[level] = skipStream.readVInt();
}
if (startOffset != null) {
startOffset[level] += skipStream.readVInt();
}
if (payPointer != null) {
payPointer[level] += skipStream.readVInt();
}

View File

@ -50,7 +50,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
private long[] lastSkipDocPointer;
private long[] lastSkipPosPointer;
private long[] lastSkipPayPointer;
private int[] lastStartOffset;
private int[] lastPayloadByteUpto;
private final IndexOutput docOut;
@ -62,7 +61,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
private long curPosPointer;
private long curPayPointer;
private int curPosBufferUpto;
private int curStartOffset;
private int curPayloadByteUpto;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
@ -81,7 +79,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
if (payOut != null) {
lastSkipPayPointer = new long[maxSkipLevels];
}
lastStartOffset = new int[maxSkipLevels];
lastPayloadByteUpto = new int[maxSkipLevels];
}
}
@ -99,9 +96,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
if (fieldHasOffsets) {
Arrays.fill(lastStartOffset, 0);
}
if (fieldHasPayloads) {
Arrays.fill(lastPayloadByteUpto, 0);
}
@ -114,14 +108,13 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
/**
* Sets the values for the current skip data.
*/
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int startOffset, int payloadByteUpto) throws IOException {
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer();
this.curPosPointer = posFP;
this.curPayPointer = payFP;
this.curPosBufferUpto = posBufferUpto;
this.curPayloadByteUpto = payloadByteUpto;
this.curStartOffset = startOffset;
bufferSkip(numDocs);
}
@ -149,11 +142,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
skipBuffer.writeVInt(curPayloadByteUpto);
}
if (fieldHasOffsets) {
skipBuffer.writeVInt(curStartOffset - lastStartOffset[level]);
lastStartOffset[level] = curStartOffset;
}
if (fieldHasOffsets || fieldHasPayloads) {
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
lastSkipPayPointer[level] = curPayPointer;