From 328def8b97325be50aa5ae2cc4e30711b7b57fee Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Sat, 18 Aug 2012 17:59:17 +0000
Subject: [PATCH] LUCENE-3892: javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1374620 13f79535-47bb-0310-9956-ffa450edef68
---
.../codecs/block/BlockPostingsFormat.java | 43 ++++++++++++++-----
.../codecs/block/BlockPostingsReader.java | 4 ++
.../codecs/block/BlockPostingsWriter.java | 4 ++
3 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
index c6a08d798c7..a14a630f4e1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
@@ -129,14 +129,14 @@ import org.apache.lucene.util.packed.PackedInts;
* <PosFPDelta, PosVIntBlockFPDelta?, PayFPDelta?>?,
* SkipFPDelta?>EntryCount
* FieldSummary --> NumFields, <FieldNumber, NumTerms, RootCodeLength,
- * ByteRootCodeLength, SumDocFreq, DocCount>
+ * {@link DataOutput#writeByte byte}RootCodeLength, SumDocFreq, DocCount>
* NumFields
* Header, PostingsHeader --> {@link CodecUtil#writeHeader CodecHeader}
* DirOffset --> {@link DataOutput#writeLong Uint64}
* PackedBlockSize, EntryCount, SuffixLength, StatsLength, DocFreq, MetaLength,
- * PosVIntBlockFPDelta , SkipFPDelta, NumFields, FieldNumber, RootCodeLength, DocCount -->
+ * PosVIntBlockFPDelta, SkipFPDelta, NumFields, FieldNumber, RootCodeLength, DocCount -->
* {@link DataOutput#writeVInt VInt}
- * TotalTermFreq, DocFPDelta, PosFPDelta, NumTerms, SumTotalTermFreq, SumDocFreq -->
+ * TotalTermFreq, DocFPDelta, PosFPDelta, PayFPDelta, NumTerms, SumTotalTermFreq, SumDocFreq -->
* {@link DataOutput#writeVLong VLong}
*
* Notes:
@@ -203,7 +203,7 @@ import org.apache.lucene.util.packed.PackedInts;
* PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?
* VIntBlock --> <DocDelta[, Freq?]>DocFreq-PackedBlockSize*PackedDocBlockNum
* SkipData --> <<SkipLevelLength, SkipLevel>
- * NumSkipLevels-1, SkipLevel> <SkipDatum?>
+ * NumSkipLevels-1, SkipLevel>, SkipDatum?
* SkipLevel --> <SkipDatum> TrimmedDocFreq/(PackedBlockSize^(Level + 1))
* SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
* OffsetStart?, PayFPSkip?>?, SkipChildLevelPointer?
@@ -267,11 +267,11 @@ import org.apache.lucene.util.packed.PackedInts;
* The .pos file contains the lists of positions that each term occurs at within documents. It also
* sometimes stores part of payloads and offsets for speedup.
*
- * - Pos(.pos) --> Header, <TermPositions> TermCount
+ * - PosFile(.pos) --> Header, <TermPositions> TermCount
* - Header --> {@link CodecUtil#writeHeader CodecHeader}
* - TermPositions --> <PackedPosDeltaBlock> PackedPosBlockNum,
* VIntBlock?
- * - VIntBlock --> PosVIntCount <PosDelta[, PayLength?], PayData?,
+ *
- VIntBlock --> PosVIntCount, <PosDelta[, PayLength?], PayData?,
* OffsetStartDelta?, OffsetLength?>PosVIntCount
*
- PackedPosDeltaBlock --> {@link PackedInts PackedInts}
* - PosVIntCount, PosDelta, OffsetStartDelta, OffsetLength -->
@@ -283,7 +283,9 @@ import org.apache.lucene.util.packed.PackedInts;
*
- TermPositions are order by term (terms are implicit, from the term dictionary), and position
* values for each term document pair are incremental, and ordered by document number.
* - PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets.
- * In particular, PackedDocBlockNum = floor(totalTermFreq/PackedBlockSize)
+ * In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize)
+ * - PosVIntCount is the number of positions encoded as VInt format. In particular,
+ * PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize
* - The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock
* in chapter Frequencies and Skip Data.
* - PosDelta is the same as the format mentioned in
@@ -302,12 +304,13 @@ import org.apache.lucene.util.packed.PackedInts;
*
* -
* Payloads and Offsets
- *
The .pay file will store payload and offset associated with certain term-document positons.
+ *
The .pay file will store payloads and offsets associated with certain term-document positons.
* Some payloads and offsets will be seperated out into .pos file, for speedup reason.
*
* - PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> TermCount
* - Header --> {@link CodecUtil#writeHeader CodecHeader}
- * - TermPayloads --> <PackedPayLengthBlock, PayBlockLength, PayData, PackedOffsetStartDeltaBlock?, PackedOffsetLengthBlock?> PackedPayBlockNum
+ *
- TermPayloads --> <PackedPayLengthBlock, PayBlockLength, PayData> PackedPayBlockNum
+ *
- TermOffsets --> <PackedOffsetStartDeltaBlock?, PackedOffsetLengthBlock?> PackedPayBlockNum
*
- PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}
* - PayBlockLength --> {@link DataOutput#writeVInt VInt}
* - PayData --> {@link DataOutput#writeByte byte}PayBlockLength
@@ -319,11 +322,13 @@ import org.apache.lucene.util.packed.PackedInts;
* - The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the
* same as PackedFreqBlock in chapter Frequencies and Skip Data.
* While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.
+ * - PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym
+ * for PackedOffsetBlockNum.
* - PayBlockLength is the total length of payloads written within one block, should be the sum
* of PayLengths in one packed block.
* - PayLength in PackedPayLengthBlock is the length of each payload, associated with current
* position.
- *
+ *
*
*
*
@@ -331,13 +336,31 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public final class BlockPostingsFormat extends PostingsFormat {
+ /**
+ * Filename extension for document number, frequencies, and skip data.
+ * See chapter: Frequencies and Skip Data
+ */
public static final String DOC_EXTENSION = "doc";
+
+ /**
+ * Filename extension for positions.
+ * See chapter: Positions
+ */
public static final String POS_EXTENSION = "pos";
+
+ /**
+ * Filename extension for payloads and offsets.
+ * See chapter: Payloads and Offsets
+ */
public static final String PAY_EXTENSION = "pay";
private final int minTermBlockSize;
private final int maxTermBlockSize;
+ /**
+ * Fixed packed block size, number of integers encoded in
+ * a single packed block.
+ */
// NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
public final static int BLOCK_SIZE = 128;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
index c5e043cccf3..f8b352b91db 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
@@ -502,6 +502,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
skipped = true;
}
+ // always plus one to fix the result, since skip position in BlockSkipReader
+ // is a little different from MultiLevelSkipListReader
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
@@ -517,6 +519,8 @@ final class BlockPostingsReader extends PostingsReaderBase {
accum = skipper.getDoc(); // actually, this is just lastSkipEntry
docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
}
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
}
if (docUpto == docFreq) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
index 183fd5e7ea6..70ca7ef9966 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
@@ -52,6 +52,10 @@ import org.apache.lucene.util.packed.PackedInts;
*/
final class BlockPostingsWriter extends PostingsWriterBase {
+ /**
+ * Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
static final int maxSkipLevels = 10;
final static String TERMS_CODEC = "BlockPostingsWriterTerms";