diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java index 4822eb7249b..0c80d702c78 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java @@ -26,6 +26,8 @@ import org.apache.lucene.util.MathUtil; /** * This abstract class writes skip lists with multiple levels. * + *
+ *
  * Example for skipInterval = 3:
  *                                                     c            (skip level 2)
  *                 c                 c                 c            (skip level 1) 
@@ -45,6 +47,7 @@ import org.apache.lucene.util.MathUtil;
  * 
  * While this class takes care of writing the different skip levels,
  * subclasses must define the actual format of the skip data.
+ * 
* @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java index afd04fff911..c3ae82d65a3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java @@ -30,10 +30,290 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; +// javadocs +import org.apache.lucene.codecs.MultiLevelSkipListWriter; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.packed.PackedInts; + /** - * Encodes/decode postings in packed int blocks for faster - * decode. + * Block postings format, which encodes postings in packed int blocks + * for faster decode. + * + *

+ * Basic idea: + *

+ *

+ * + *

+ * Files and detailed format: + *

+ *

+ * + * + *
+ *
+ * Term Dictionary + * + *

The .tim file format is quite similar to Lucene40PostingsFormat, + * with minor difference in MetadataBlock

+ * + * + *

Notes:

+ * + *
+ *
+ * + * + *
+ *
+ * Term Index + *

The .tim file format is mentioned in + * + * Lucene40PostingsFormat:TermIndex + *

+ *
+ * + * + * + *
+ *
+ * Frequencies and Skip Data + * + *

The .doc file contains the lists of documents which contain each term, along + * with the frequency of the term in that document (except when frequencies are + * omitted: {@link IndexOptions#DOCS_ONLY}). It also saves skip data to the beginning of + * each packed or VInt block, when the length of document list is larger than packed block size.

+ * + * + *

Notes:

+ * + *
+ *
+ * + * + *
+ *
+ * Positions + * + *

Notes:

+ * + *
+ *
+ * + * + *
+ *
+ * Payloads and Offsets + * + *

Notes:

+ *
+ *
+ *

+ * */ + public final class BlockPostingsFormat extends PostingsFormat { public static final String DOC_EXTENSION = "doc"; public static final String POS_EXTENSION = "pos"; @@ -42,7 +322,7 @@ public final class BlockPostingsFormat extends PostingsFormat { private final int minTermBlockSize; private final int maxTermBlockSize; - // NOTE: must be factor of 64 because of PackedInts long-aligned encoding/decoding + // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding public final static int BLOCK_SIZE = 128; public BlockPostingsFormat() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java index 3f7b919003d..c5e043cccf3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java @@ -52,7 +52,7 @@ import org.apache.lucene.util.IOUtils; * @see BlockSkipReader for details * */ -public final class BlockPostingsReader extends PostingsReaderBase { +final class BlockPostingsReader extends PostingsReaderBase { private final IndexInput docIn; private final IndexInput posIn; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java index 26f7f02816a..68bc5434474 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java @@ -159,7 +159,7 @@ import org.apache.lucene.util.fst.FST; // javadocs * with the frequency of the term in that document (except when frequencies are * omitted: {@link IndexOptions#DOCS_ONLY}).

*