LUCENE-4443: don't write unnecessary skipdata in BlockSkipWriter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391433 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-28 12:49:53 +00:00
parent 09720113f8
commit 014e6e0c94
6 changed files with 20 additions and 49 deletions

View File

@ -33,6 +33,12 @@ Bug Fixes
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth * LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
parameter is reset to the default (1), even if set otherwise. parameter is reset to the default (1), even if set otherwise.
(Gilad Barkai via Shai Erera) (Gilad Barkai via Shai Erera)
Optimizations
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
into the skipdata. You need to reindex any indexes created with
this experimental codec. (Robert Muir)
======================= Lucene 4.0.0 ======================= ======================= Lucene 4.0.0 =======================

View File

@ -202,7 +202,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <sup>NumSkipLevels-1</sup>, SkipLevel&gt;, SkipDatum?</li> * <sup>NumSkipLevels-1</sup>, SkipLevel&gt;, SkipDatum?</li>
* <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li> * <li>SkipLevel --&gt; &lt;SkipDatum&gt; <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
* <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?, * <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?,
* OffsetStart?, PayFPSkip?&gt;?, SkipChildLevelPointer?</li> * PayFPSkip?&gt;?, SkipChildLevelPointer?</li>
* <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}</li> * <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}</li>
* <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip * <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip
* --&gt; * --&gt;
@ -250,7 +250,6 @@ import org.apache.lucene.util.packed.PackedInts;
* equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of * equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
* current term's TermFreqs, and stored as a difference sequence.</li> * current term's TermFreqs, and stored as a difference sequence.</li>
* <li>PayLength indicates the length of last payload.</li> * <li>PayLength indicates the length of last payload.</li>
* <li>OffsetStart indicates the first value of last offset pair.</li>
* </ul> * </ul>
* </dd> * </dd>
* </dl> * </dl>

View File

@ -72,8 +72,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
ioContext); ioContext);
CodecUtil.checkHeader(docIn, CodecUtil.checkHeader(docIn,
BlockPostingsWriter.DOC_CODEC, BlockPostingsWriter.DOC_CODEC,
BlockPostingsWriter.VERSION_START, BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_START); BlockPostingsWriter.VERSION_CURRENT);
forUtil = new ForUtil(docIn); forUtil = new ForUtil(docIn);
if (fieldInfos.hasProx()) { if (fieldInfos.hasProx()) {
@ -81,16 +81,16 @@ final class BlockPostingsReader extends PostingsReaderBase {
ioContext); ioContext);
CodecUtil.checkHeader(posIn, CodecUtil.checkHeader(posIn,
BlockPostingsWriter.POS_CODEC, BlockPostingsWriter.POS_CODEC,
BlockPostingsWriter.VERSION_START, BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_START); BlockPostingsWriter.VERSION_CURRENT);
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) { if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION),
ioContext); ioContext);
CodecUtil.checkHeader(payIn, CodecUtil.checkHeader(payIn,
BlockPostingsWriter.PAY_CODEC, BlockPostingsWriter.PAY_CODEC,
BlockPostingsWriter.VERSION_START, BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_START); BlockPostingsWriter.VERSION_CURRENT);
} }
} }
@ -110,8 +110,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
// Make sure we are talking to the matching postings writer // Make sure we are talking to the matching postings writer
CodecUtil.checkHeader(termsIn, CodecUtil.checkHeader(termsIn,
BlockPostingsWriter.TERMS_CODEC, BlockPostingsWriter.TERMS_CODEC,
BlockPostingsWriter.VERSION_START, BlockPostingsWriter.VERSION_CURRENT,
BlockPostingsWriter.VERSION_START); BlockPostingsWriter.VERSION_CURRENT);
final int indexBlockSize = termsIn.readVInt(); final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != BLOCK_SIZE) { if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")"); throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
@ -1314,7 +1314,7 @@ final class BlockPostingsReader extends PostingsReaderBase {
posPendingFP = skipper.getPosPointer(); posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer(); payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto(); posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = skipper.getStartOffset(); lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto(); payloadByteUpto = skipper.getPayloadByteUpto();
} }
nextSkipDoc = skipper.getNextSkipDoc(); nextSkipDoc = skipper.getNextSkipDoc();

View File

@ -65,7 +65,8 @@ final class BlockPostingsWriter extends PostingsWriterBase {
// Increment version to change it: // Increment version to change it:
final static int VERSION_START = 0; final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START; final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443
final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA;
final IndexOutput docOut; final IndexOutput docOut;
final IndexOutput posOut; final IndexOutput posOut;
@ -101,7 +102,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
private long lastBlockPosFP; private long lastBlockPosFP;
private long lastBlockPayFP; private long lastBlockPayFP;
private int lastBlockPosBufferUpto; private int lastBlockPosBufferUpto;
private int lastBlockStartOffset;
private int lastBlockPayloadByteUpto; private int lastBlockPayloadByteUpto;
private int lastDocID; private int lastDocID;
@ -232,7 +232,7 @@ final class BlockPostingsWriter extends PostingsWriterBase {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1)); // System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
// } // }
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto); skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
} }
final int docDelta = docID - lastDocID; final int docDelta = docID - lastDocID;
@ -337,7 +337,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
} }
lastBlockPosFP = posOut.getFilePointer(); lastBlockPosFP = posOut.getFilePointer();
lastBlockPosBufferUpto = posBufferUpto; lastBlockPosBufferUpto = posBufferUpto;
lastBlockStartOffset = lastStartOffset;
lastBlockPayloadByteUpto = payloadByteUpto; lastBlockPayloadByteUpto = payloadByteUpto;
} }
// if (DEBUG) { // if (DEBUG) {

View File

@ -58,12 +58,10 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
private long posPointer[]; private long posPointer[];
private long payPointer[]; private long payPointer[];
private int posBufferUpto[]; private int posBufferUpto[];
private int startOffset[];
private int payloadByteUpto[]; private int payloadByteUpto[];
private long lastPosPointer; private long lastPosPointer;
private long lastPayPointer; private long lastPayPointer;
private int lastStartOffset;
private int lastPayloadByteUpto; private int lastPayloadByteUpto;
private long lastDocPointer; private long lastDocPointer;
private int lastPosBufferUpto; private int lastPosBufferUpto;
@ -80,11 +78,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
} else { } else {
payloadByteUpto = null; payloadByteUpto = null;
} }
if (hasOffsets) {
startOffset = new int[maxSkipLevels];
} else {
startOffset = null;
}
if (hasOffsets || hasPayloads) { if (hasOffsets || hasPayloads) {
payPointer = new long[maxSkipLevels]; payPointer = new long[maxSkipLevels];
} else { } else {
@ -143,10 +136,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
return lastPayPointer; return lastPayPointer;
} }
public int getStartOffset() {
return lastStartOffset;
}
public int getPayloadByteUpto() { public int getPayloadByteUpto() {
return lastPayloadByteUpto; return lastPayloadByteUpto;
} }
@ -165,9 +154,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
if (posPointer != null) { if (posPointer != null) {
posPointer[level] = lastPosPointer; posPointer[level] = lastPosPointer;
posBufferUpto[level] = lastPosBufferUpto; posBufferUpto[level] = lastPosBufferUpto;
if (startOffset != null) {
startOffset[level] = lastStartOffset;
}
if (payloadByteUpto != null) { if (payloadByteUpto != null) {
payloadByteUpto[level] = lastPayloadByteUpto; payloadByteUpto[level] = lastPayloadByteUpto;
} }
@ -194,9 +180,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
if (payPointer != null) { if (payPointer != null) {
lastPayPointer = payPointer[level]; lastPayPointer = payPointer[level];
} }
if (startOffset != null) {
lastStartOffset = startOffset[level];
}
if (payloadByteUpto != null) { if (payloadByteUpto != null) {
lastPayloadByteUpto = payloadByteUpto[level]; lastPayloadByteUpto = payloadByteUpto[level];
} }
@ -231,10 +214,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
payloadByteUpto[level] = skipStream.readVInt(); payloadByteUpto[level] = skipStream.readVInt();
} }
if (startOffset != null) {
startOffset[level] += skipStream.readVInt();
}
if (payPointer != null) { if (payPointer != null) {
payPointer[level] += skipStream.readVInt(); payPointer[level] += skipStream.readVInt();
} }

View File

@ -50,7 +50,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
private long[] lastSkipDocPointer; private long[] lastSkipDocPointer;
private long[] lastSkipPosPointer; private long[] lastSkipPosPointer;
private long[] lastSkipPayPointer; private long[] lastSkipPayPointer;
private int[] lastStartOffset;
private int[] lastPayloadByteUpto; private int[] lastPayloadByteUpto;
private final IndexOutput docOut; private final IndexOutput docOut;
@ -62,7 +61,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
private long curPosPointer; private long curPosPointer;
private long curPayPointer; private long curPayPointer;
private int curPosBufferUpto; private int curPosBufferUpto;
private int curStartOffset;
private int curPayloadByteUpto; private int curPayloadByteUpto;
private boolean fieldHasPositions; private boolean fieldHasPositions;
private boolean fieldHasOffsets; private boolean fieldHasOffsets;
@ -81,7 +79,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
if (payOut != null) { if (payOut != null) {
lastSkipPayPointer = new long[maxSkipLevels]; lastSkipPayPointer = new long[maxSkipLevels];
} }
lastStartOffset = new int[maxSkipLevels];
lastPayloadByteUpto = new int[maxSkipLevels]; lastPayloadByteUpto = new int[maxSkipLevels];
} }
} }
@ -99,9 +96,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer()); Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
if (fieldHasPositions) { if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer()); Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
if (fieldHasOffsets) {
Arrays.fill(lastStartOffset, 0);
}
if (fieldHasPayloads) { if (fieldHasPayloads) {
Arrays.fill(lastPayloadByteUpto, 0); Arrays.fill(lastPayloadByteUpto, 0);
} }
@ -114,14 +108,13 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
/** /**
* Sets the values for the current skip data. * Sets the values for the current skip data.
*/ */
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int startOffset, int payloadByteUpto) throws IOException { public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
this.curDoc = doc; this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer(); this.curDocPointer = docOut.getFilePointer();
this.curPosPointer = posFP; this.curPosPointer = posFP;
this.curPayPointer = payFP; this.curPayPointer = payFP;
this.curPosBufferUpto = posBufferUpto; this.curPosBufferUpto = posBufferUpto;
this.curPayloadByteUpto = payloadByteUpto; this.curPayloadByteUpto = payloadByteUpto;
this.curStartOffset = startOffset;
bufferSkip(numDocs); bufferSkip(numDocs);
} }
@ -149,11 +142,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
skipBuffer.writeVInt(curPayloadByteUpto); skipBuffer.writeVInt(curPayloadByteUpto);
} }
if (fieldHasOffsets) {
skipBuffer.writeVInt(curStartOffset - lastStartOffset[level]);
lastStartOffset[level] = curStartOffset;
}
if (fieldHasOffsets || fieldHasPayloads) { if (fieldHasOffsets || fieldHasPayloads) {
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level])); skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
lastSkipPayPointer[level] = curPayPointer; lastSkipPayPointer[level] = curPayPointer;