mirror of https://github.com/apache/lucene.git
LUCENE-4443: don't write unnecessary skipdata in BlockSkipWriter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391433 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09720113f8
commit
014e6e0c94
|
@ -33,6 +33,12 @@ Bug Fixes
|
||||||
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
|
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
|
||||||
parameter is reset to the default (1), even if set otherwise.
|
parameter is reset to the default (1), even if set otherwise.
|
||||||
(Gilad Barkai via Shai Erera)
|
(Gilad Barkai via Shai Erera)
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
|
||||||
|
into the skipdata. You need to reindex any indexes created with
|
||||||
|
this experimental codec. (Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 4.0.0 =======================
|
======================= Lucene 4.0.0 =======================
|
||||||
|
|
||||||
|
|
|
@ -202,7 +202,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li>
|
* <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li>
|
||||||
* <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
|
* <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
|
||||||
* <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
|
* <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
|
||||||
* OffsetStart?, PayFPSkip?>?, SkipChildLevelPointer?</li>
|
* PayFPSkip?>?, SkipChildLevelPointer?</li>
|
||||||
* <li>PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}</li>
|
* <li>PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}</li>
|
||||||
* <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip
|
* <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip
|
||||||
* -->
|
* -->
|
||||||
|
@ -250,7 +250,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
|
* equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
|
||||||
* current term's TermFreqs, and stored as a difference sequence.</li>
|
* current term's TermFreqs, and stored as a difference sequence.</li>
|
||||||
* <li>PayLength indicates the length of last payload.</li>
|
* <li>PayLength indicates the length of last payload.</li>
|
||||||
* <li>OffsetStart indicates the first value of last offset pair.</li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* </dd>
|
* </dd>
|
||||||
* </dl>
|
* </dl>
|
||||||
|
|
|
@ -72,8 +72,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
ioContext);
|
ioContext);
|
||||||
CodecUtil.checkHeader(docIn,
|
CodecUtil.checkHeader(docIn,
|
||||||
BlockPostingsWriter.DOC_CODEC,
|
BlockPostingsWriter.DOC_CODEC,
|
||||||
BlockPostingsWriter.VERSION_START,
|
BlockPostingsWriter.VERSION_CURRENT,
|
||||||
BlockPostingsWriter.VERSION_START);
|
BlockPostingsWriter.VERSION_CURRENT);
|
||||||
forUtil = new ForUtil(docIn);
|
forUtil = new ForUtil(docIn);
|
||||||
|
|
||||||
if (fieldInfos.hasProx()) {
|
if (fieldInfos.hasProx()) {
|
||||||
|
@ -81,16 +81,16 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
ioContext);
|
ioContext);
|
||||||
CodecUtil.checkHeader(posIn,
|
CodecUtil.checkHeader(posIn,
|
||||||
BlockPostingsWriter.POS_CODEC,
|
BlockPostingsWriter.POS_CODEC,
|
||||||
BlockPostingsWriter.VERSION_START,
|
BlockPostingsWriter.VERSION_CURRENT,
|
||||||
BlockPostingsWriter.VERSION_START);
|
BlockPostingsWriter.VERSION_CURRENT);
|
||||||
|
|
||||||
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
|
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
|
||||||
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION),
|
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION),
|
||||||
ioContext);
|
ioContext);
|
||||||
CodecUtil.checkHeader(payIn,
|
CodecUtil.checkHeader(payIn,
|
||||||
BlockPostingsWriter.PAY_CODEC,
|
BlockPostingsWriter.PAY_CODEC,
|
||||||
BlockPostingsWriter.VERSION_START,
|
BlockPostingsWriter.VERSION_CURRENT,
|
||||||
BlockPostingsWriter.VERSION_START);
|
BlockPostingsWriter.VERSION_CURRENT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,8 +110,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
// Make sure we are talking to the matching postings writer
|
// Make sure we are talking to the matching postings writer
|
||||||
CodecUtil.checkHeader(termsIn,
|
CodecUtil.checkHeader(termsIn,
|
||||||
BlockPostingsWriter.TERMS_CODEC,
|
BlockPostingsWriter.TERMS_CODEC,
|
||||||
BlockPostingsWriter.VERSION_START,
|
BlockPostingsWriter.VERSION_CURRENT,
|
||||||
BlockPostingsWriter.VERSION_START);
|
BlockPostingsWriter.VERSION_CURRENT);
|
||||||
final int indexBlockSize = termsIn.readVInt();
|
final int indexBlockSize = termsIn.readVInt();
|
||||||
if (indexBlockSize != BLOCK_SIZE) {
|
if (indexBlockSize != BLOCK_SIZE) {
|
||||||
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
|
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
|
||||||
|
@ -1314,7 +1314,7 @@ final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
posPendingFP = skipper.getPosPointer();
|
posPendingFP = skipper.getPosPointer();
|
||||||
payPendingFP = skipper.getPayPointer();
|
payPendingFP = skipper.getPayPointer();
|
||||||
posPendingCount = skipper.getPosBufferUpto();
|
posPendingCount = skipper.getPosBufferUpto();
|
||||||
lastStartOffset = skipper.getStartOffset();
|
lastStartOffset = 0; // new document
|
||||||
payloadByteUpto = skipper.getPayloadByteUpto();
|
payloadByteUpto = skipper.getPayloadByteUpto();
|
||||||
}
|
}
|
||||||
nextSkipDoc = skipper.getNextSkipDoc();
|
nextSkipDoc = skipper.getNextSkipDoc();
|
||||||
|
|
|
@ -65,7 +65,8 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
||||||
|
|
||||||
// Increment version to change it:
|
// Increment version to change it:
|
||||||
final static int VERSION_START = 0;
|
final static int VERSION_START = 0;
|
||||||
final static int VERSION_CURRENT = VERSION_START;
|
final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443
|
||||||
|
final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA;
|
||||||
|
|
||||||
final IndexOutput docOut;
|
final IndexOutput docOut;
|
||||||
final IndexOutput posOut;
|
final IndexOutput posOut;
|
||||||
|
@ -101,7 +102,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
||||||
private long lastBlockPosFP;
|
private long lastBlockPosFP;
|
||||||
private long lastBlockPayFP;
|
private long lastBlockPayFP;
|
||||||
private int lastBlockPosBufferUpto;
|
private int lastBlockPosBufferUpto;
|
||||||
private int lastBlockStartOffset;
|
|
||||||
private int lastBlockPayloadByteUpto;
|
private int lastBlockPayloadByteUpto;
|
||||||
|
|
||||||
private int lastDocID;
|
private int lastDocID;
|
||||||
|
@ -232,7 +232,7 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
|
// System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1));
|
||||||
// }
|
// }
|
||||||
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
|
skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int docDelta = docID - lastDocID;
|
final int docDelta = docID - lastDocID;
|
||||||
|
@ -337,7 +337,6 @@ final class BlockPostingsWriter extends PostingsWriterBase {
|
||||||
}
|
}
|
||||||
lastBlockPosFP = posOut.getFilePointer();
|
lastBlockPosFP = posOut.getFilePointer();
|
||||||
lastBlockPosBufferUpto = posBufferUpto;
|
lastBlockPosBufferUpto = posBufferUpto;
|
||||||
lastBlockStartOffset = lastStartOffset;
|
|
||||||
lastBlockPayloadByteUpto = payloadByteUpto;
|
lastBlockPayloadByteUpto = payloadByteUpto;
|
||||||
}
|
}
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
|
|
|
@ -58,12 +58,10 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
private long posPointer[];
|
private long posPointer[];
|
||||||
private long payPointer[];
|
private long payPointer[];
|
||||||
private int posBufferUpto[];
|
private int posBufferUpto[];
|
||||||
private int startOffset[];
|
|
||||||
private int payloadByteUpto[];
|
private int payloadByteUpto[];
|
||||||
|
|
||||||
private long lastPosPointer;
|
private long lastPosPointer;
|
||||||
private long lastPayPointer;
|
private long lastPayPointer;
|
||||||
private int lastStartOffset;
|
|
||||||
private int lastPayloadByteUpto;
|
private int lastPayloadByteUpto;
|
||||||
private long lastDocPointer;
|
private long lastDocPointer;
|
||||||
private int lastPosBufferUpto;
|
private int lastPosBufferUpto;
|
||||||
|
@ -80,11 +78,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
} else {
|
} else {
|
||||||
payloadByteUpto = null;
|
payloadByteUpto = null;
|
||||||
}
|
}
|
||||||
if (hasOffsets) {
|
|
||||||
startOffset = new int[maxSkipLevels];
|
|
||||||
} else {
|
|
||||||
startOffset = null;
|
|
||||||
}
|
|
||||||
if (hasOffsets || hasPayloads) {
|
if (hasOffsets || hasPayloads) {
|
||||||
payPointer = new long[maxSkipLevels];
|
payPointer = new long[maxSkipLevels];
|
||||||
} else {
|
} else {
|
||||||
|
@ -143,10 +136,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
return lastPayPointer;
|
return lastPayPointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getStartOffset() {
|
|
||||||
return lastStartOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getPayloadByteUpto() {
|
public int getPayloadByteUpto() {
|
||||||
return lastPayloadByteUpto;
|
return lastPayloadByteUpto;
|
||||||
}
|
}
|
||||||
|
@ -165,9 +154,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
if (posPointer != null) {
|
if (posPointer != null) {
|
||||||
posPointer[level] = lastPosPointer;
|
posPointer[level] = lastPosPointer;
|
||||||
posBufferUpto[level] = lastPosBufferUpto;
|
posBufferUpto[level] = lastPosBufferUpto;
|
||||||
if (startOffset != null) {
|
|
||||||
startOffset[level] = lastStartOffset;
|
|
||||||
}
|
|
||||||
if (payloadByteUpto != null) {
|
if (payloadByteUpto != null) {
|
||||||
payloadByteUpto[level] = lastPayloadByteUpto;
|
payloadByteUpto[level] = lastPayloadByteUpto;
|
||||||
}
|
}
|
||||||
|
@ -194,9 +180,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
if (payPointer != null) {
|
if (payPointer != null) {
|
||||||
lastPayPointer = payPointer[level];
|
lastPayPointer = payPointer[level];
|
||||||
}
|
}
|
||||||
if (startOffset != null) {
|
|
||||||
lastStartOffset = startOffset[level];
|
|
||||||
}
|
|
||||||
if (payloadByteUpto != null) {
|
if (payloadByteUpto != null) {
|
||||||
lastPayloadByteUpto = payloadByteUpto[level];
|
lastPayloadByteUpto = payloadByteUpto[level];
|
||||||
}
|
}
|
||||||
|
@ -231,10 +214,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader {
|
||||||
payloadByteUpto[level] = skipStream.readVInt();
|
payloadByteUpto[level] = skipStream.readVInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startOffset != null) {
|
|
||||||
startOffset[level] += skipStream.readVInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (payPointer != null) {
|
if (payPointer != null) {
|
||||||
payPointer[level] += skipStream.readVInt();
|
payPointer[level] += skipStream.readVInt();
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
private long[] lastSkipDocPointer;
|
private long[] lastSkipDocPointer;
|
||||||
private long[] lastSkipPosPointer;
|
private long[] lastSkipPosPointer;
|
||||||
private long[] lastSkipPayPointer;
|
private long[] lastSkipPayPointer;
|
||||||
private int[] lastStartOffset;
|
|
||||||
private int[] lastPayloadByteUpto;
|
private int[] lastPayloadByteUpto;
|
||||||
|
|
||||||
private final IndexOutput docOut;
|
private final IndexOutput docOut;
|
||||||
|
@ -62,7 +61,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
private long curPosPointer;
|
private long curPosPointer;
|
||||||
private long curPayPointer;
|
private long curPayPointer;
|
||||||
private int curPosBufferUpto;
|
private int curPosBufferUpto;
|
||||||
private int curStartOffset;
|
|
||||||
private int curPayloadByteUpto;
|
private int curPayloadByteUpto;
|
||||||
private boolean fieldHasPositions;
|
private boolean fieldHasPositions;
|
||||||
private boolean fieldHasOffsets;
|
private boolean fieldHasOffsets;
|
||||||
|
@ -81,7 +79,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
if (payOut != null) {
|
if (payOut != null) {
|
||||||
lastSkipPayPointer = new long[maxSkipLevels];
|
lastSkipPayPointer = new long[maxSkipLevels];
|
||||||
}
|
}
|
||||||
lastStartOffset = new int[maxSkipLevels];
|
|
||||||
lastPayloadByteUpto = new int[maxSkipLevels];
|
lastPayloadByteUpto = new int[maxSkipLevels];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,9 +96,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
|
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
|
||||||
if (fieldHasPositions) {
|
if (fieldHasPositions) {
|
||||||
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
|
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
|
||||||
if (fieldHasOffsets) {
|
|
||||||
Arrays.fill(lastStartOffset, 0);
|
|
||||||
}
|
|
||||||
if (fieldHasPayloads) {
|
if (fieldHasPayloads) {
|
||||||
Arrays.fill(lastPayloadByteUpto, 0);
|
Arrays.fill(lastPayloadByteUpto, 0);
|
||||||
}
|
}
|
||||||
|
@ -114,14 +108,13 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
/**
|
/**
|
||||||
* Sets the values for the current skip data.
|
* Sets the values for the current skip data.
|
||||||
*/
|
*/
|
||||||
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int startOffset, int payloadByteUpto) throws IOException {
|
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
||||||
this.curDoc = doc;
|
this.curDoc = doc;
|
||||||
this.curDocPointer = docOut.getFilePointer();
|
this.curDocPointer = docOut.getFilePointer();
|
||||||
this.curPosPointer = posFP;
|
this.curPosPointer = posFP;
|
||||||
this.curPayPointer = payFP;
|
this.curPayPointer = payFP;
|
||||||
this.curPosBufferUpto = posBufferUpto;
|
this.curPosBufferUpto = posBufferUpto;
|
||||||
this.curPayloadByteUpto = payloadByteUpto;
|
this.curPayloadByteUpto = payloadByteUpto;
|
||||||
this.curStartOffset = startOffset;
|
|
||||||
bufferSkip(numDocs);
|
bufferSkip(numDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,11 +142,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
|
||||||
skipBuffer.writeVInt(curPayloadByteUpto);
|
skipBuffer.writeVInt(curPayloadByteUpto);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fieldHasOffsets) {
|
|
||||||
skipBuffer.writeVInt(curStartOffset - lastStartOffset[level]);
|
|
||||||
lastStartOffset[level] = curStartOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fieldHasOffsets || fieldHasPayloads) {
|
if (fieldHasOffsets || fieldHasPayloads) {
|
||||||
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
|
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
|
||||||
lastSkipPayPointer[level] = curPayPointer;
|
lastSkipPayPointer[level] = curPayPointer;
|
||||||
|
|
Loading…
Reference in New Issue