From 014e6e0c946ab68674ddb6bfcc5d6ccdcebfa522 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 28 Sep 2012 12:49:53 +0000 Subject: [PATCH] LUCENE-4443: don't write unnecessary skipdata in BlockSkipWriter git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391433 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 6 ++++++ .../codecs/block/BlockPostingsFormat.java | 3 +-- .../codecs/block/BlockPostingsReader.java | 18 ++++++++-------- .../codecs/block/BlockPostingsWriter.java | 7 +++---- .../lucene/codecs/block/BlockSkipReader.java | 21 ------------------- .../lucene/codecs/block/BlockSkipWriter.java | 14 +------------ 6 files changed, 20 insertions(+), 49 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 910ab8965e5..fbce028bb93 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -33,6 +33,12 @@ Bug Fixes * LUCENE-4411: when sampling is enabled for a FacetRequest, its depth parameter is reset to the default (1), even if set otherwise. (Gilad Barkai via Shai Erera) + +Optimizations + +* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets + into the skipdata. You need to reindex any indexes created with + this experimental codec. (Robert Muir) ======================= Lucene 4.0.0 ======================= diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java index b9b22ba5427..b26d0c65746 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java @@ -202,7 +202,7 @@ import org.apache.lucene.util.packed.PackedInts; * NumSkipLevels-1, SkipLevel>, SkipDatum? *
  • SkipLevel --> <SkipDatum> TrimmedDocFreq/(PackedBlockSize^(Level + 1))
  • *
  • SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?, - * OffsetStart?, PayFPSkip?>?, SkipChildLevelPointer?
  • + * PayFPSkip?>?, SkipChildLevelPointer? *
  • PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}
  • *
  • DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayLength, OffsetStart, PayFPSkip * --> @@ -250,7 +250,6 @@ import org.apache.lucene.util.packed.PackedInts; * equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of * current term's TermFreqs, and stored as a difference sequence.
  • *
  • PayLength indicates the length of last payload.
  • - *
  • OffsetStart indicates the first value of last offset pair.
  • * * * diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java index 100fae2978a..c9132babb6e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java @@ -72,8 +72,8 @@ final class BlockPostingsReader extends PostingsReaderBase { ioContext); CodecUtil.checkHeader(docIn, BlockPostingsWriter.DOC_CODEC, - BlockPostingsWriter.VERSION_START, - BlockPostingsWriter.VERSION_START); + BlockPostingsWriter.VERSION_CURRENT, + BlockPostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.hasProx()) { @@ -81,16 +81,16 @@ final class BlockPostingsReader extends PostingsReaderBase { ioContext); CodecUtil.checkHeader(posIn, BlockPostingsWriter.POS_CODEC, - BlockPostingsWriter.VERSION_START, - BlockPostingsWriter.VERSION_START); + BlockPostingsWriter.VERSION_CURRENT, + BlockPostingsWriter.VERSION_CURRENT); if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) { payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.checkHeader(payIn, BlockPostingsWriter.PAY_CODEC, - BlockPostingsWriter.VERSION_START, - BlockPostingsWriter.VERSION_START); + BlockPostingsWriter.VERSION_CURRENT, + BlockPostingsWriter.VERSION_CURRENT); } } @@ -110,8 +110,8 @@ final class BlockPostingsReader extends PostingsReaderBase { // Make sure we are talking to the matching postings writer CodecUtil.checkHeader(termsIn, BlockPostingsWriter.TERMS_CODEC, - BlockPostingsWriter.VERSION_START, - BlockPostingsWriter.VERSION_START); + BlockPostingsWriter.VERSION_CURRENT, + BlockPostingsWriter.VERSION_CURRENT); final int indexBlockSize = termsIn.readVInt(); if (indexBlockSize != BLOCK_SIZE) { throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")"); @@ -1314,7 +1314,7 @@ final class BlockPostingsReader extends PostingsReaderBase { posPendingFP = skipper.getPosPointer(); payPendingFP = skipper.getPayPointer(); posPendingCount = skipper.getPosBufferUpto(); - lastStartOffset = skipper.getStartOffset(); + lastStartOffset = 0; // new document payloadByteUpto = skipper.getPayloadByteUpto(); } nextSkipDoc = skipper.getNextSkipDoc(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java index 70ca7ef9966..d07547e1eb1 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java @@ -65,7 +65,8 @@ final class BlockPostingsWriter extends PostingsWriterBase { // Increment version to change it: final static int VERSION_START = 0; - final static int VERSION_CURRENT = VERSION_START; + final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443 + final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA; final IndexOutput docOut; final IndexOutput posOut; @@ -101,7 +102,6 @@ final class BlockPostingsWriter extends PostingsWriterBase { private long lastBlockPosFP; private long lastBlockPayFP; private int lastBlockPosBufferUpto; - private int lastBlockStartOffset; private int lastBlockPayloadByteUpto; private int lastDocID; @@ -232,7 +232,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { // if (DEBUG) { // System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-1)); // } - skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto); + skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto); } final int docDelta = docID - lastDocID; @@ -337,7 +337,6 @@ final class BlockPostingsWriter extends PostingsWriterBase { } lastBlockPosFP = posOut.getFilePointer(); lastBlockPosBufferUpto = posBufferUpto; - lastBlockStartOffset = lastStartOffset; lastBlockPayloadByteUpto = payloadByteUpto; } // if (DEBUG) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java index 169219c4fd1..e5803fd9696 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java @@ -58,12 +58,10 @@ final class BlockSkipReader extends MultiLevelSkipListReader { private long posPointer[]; private long payPointer[]; private int posBufferUpto[]; - private int startOffset[]; private int payloadByteUpto[]; private long lastPosPointer; private long lastPayPointer; - private int lastStartOffset; private int lastPayloadByteUpto; private long lastDocPointer; private int lastPosBufferUpto; @@ -80,11 +78,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader { } else { payloadByteUpto = null; } - if (hasOffsets) { - startOffset = new int[maxSkipLevels]; - } else { - startOffset = null; - } if (hasOffsets || hasPayloads) { payPointer = new long[maxSkipLevels]; } else { @@ -143,10 +136,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader { return lastPayPointer; } - public int getStartOffset() { - return lastStartOffset; - } - public int getPayloadByteUpto() { return lastPayloadByteUpto; } @@ -165,9 +154,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader { if (posPointer != null) { posPointer[level] = lastPosPointer; posBufferUpto[level] = lastPosBufferUpto; - if (startOffset != null) { - startOffset[level] = lastStartOffset; - } if (payloadByteUpto != null) { payloadByteUpto[level] = lastPayloadByteUpto; } @@ -194,9 +180,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader { if (payPointer != null) { lastPayPointer = payPointer[level]; } - if (startOffset != null) { - lastStartOffset = startOffset[level]; - } if (payloadByteUpto != null) { lastPayloadByteUpto = payloadByteUpto[level]; } @@ -231,10 +214,6 @@ final class BlockSkipReader extends MultiLevelSkipListReader { payloadByteUpto[level] = skipStream.readVInt(); } - if (startOffset != null) { - startOffset[level] += skipStream.readVInt(); - } - if (payPointer != null) { payPointer[level] += skipStream.readVInt(); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java index 8ece562ab5e..409930c6fed 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java @@ -50,7 +50,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { private long[] lastSkipDocPointer; private long[] lastSkipPosPointer; private long[] lastSkipPayPointer; - private int[] lastStartOffset; private int[] lastPayloadByteUpto; private final IndexOutput docOut; @@ -62,7 +61,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { private long curPosPointer; private long curPayPointer; private int curPosBufferUpto; - private int curStartOffset; private int curPayloadByteUpto; private boolean fieldHasPositions; private boolean fieldHasOffsets; @@ -81,7 +79,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { if (payOut != null) { lastSkipPayPointer = new long[maxSkipLevels]; } - lastStartOffset = new int[maxSkipLevels]; lastPayloadByteUpto = new int[maxSkipLevels]; } } @@ -99,9 +96,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { Arrays.fill(lastSkipDocPointer, docOut.getFilePointer()); if (fieldHasPositions) { Arrays.fill(lastSkipPosPointer, posOut.getFilePointer()); - if (fieldHasOffsets) { - Arrays.fill(lastStartOffset, 0); - } if (fieldHasPayloads) { Arrays.fill(lastPayloadByteUpto, 0); } @@ -114,14 +108,13 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { /** * Sets the values for the current skip data. */ - public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int startOffset, int payloadByteUpto) throws IOException { + public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException { this.curDoc = doc; this.curDocPointer = docOut.getFilePointer(); this.curPosPointer = posFP; this.curPayPointer = payFP; this.curPosBufferUpto = posBufferUpto; this.curPayloadByteUpto = payloadByteUpto; - this.curStartOffset = startOffset; bufferSkip(numDocs); } @@ -149,11 +142,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { skipBuffer.writeVInt(curPayloadByteUpto); } - if (fieldHasOffsets) { - skipBuffer.writeVInt(curStartOffset - lastStartOffset[level]); - lastStartOffset[level] = curStartOffset; - } - if (fieldHasOffsets || fieldHasPayloads) { skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level])); lastSkipPayPointer[level] = curPayPointer;