diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
index 39089920c9a..89368fd2a92 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
@@ -93,6 +93,7 @@ import org.apache.lucene.util.fst.Util;
*
*
*
@@ -113,7 +114,7 @@ import org.apache.lucene.util.fst.Util;
*
*
* - TermsDict (.tim) --> Header, PostingsHeader, NodeBlockNumBlocks,
- * FieldSummary, DirOffset, Footer
+ * Footer
*
- NodeBlock --> (OuterNode | InnerNode)
*
- OuterNode --> EntryCount, SuffixLength, ByteSuffixLength, StatsLength, <
* TermStats >EntryCount, MetaLength,
@@ -122,16 +123,10 @@ import org.apache.lucene.util.fst.Util;
* < TermStats ? >EntryCount, MetaLength, <TermMetadata ?
* >EntryCount
*
- TermStats --> DocFreq, TotalTermFreq
- *
- FieldSummary --> NumFields, <FieldNumber, NumTerms, RootCodeLength,
- * ByteRootCodeLength, SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm,
- * MaxTerm>NumFields
*
- Header --> {@link CodecUtil#writeHeader CodecHeader}
- *
- DirOffset --> {@link DataOutput#writeLong Uint64}
- *
- MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[]
- *
- EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
- * FieldNumber,RootCodeLength,DocCount,LongsSize --> {@link DataOutput#writeVInt VInt}
- *
- TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --> {@link DataOutput#writeVLong
- * VLong}
+ *
- EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength --> {@link DataOutput#writeVInt
+ * VInt}
+ *
- TotalTermFreq --> {@link DataOutput#writeVLong VLong}
*
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
*
*
@@ -140,24 +135,48 @@ import org.apache.lucene.util.fst.Util;
*
* - Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information for
* the BlockTree implementation.
- *
- DirOffset is a pointer to the FieldSummary section.
*
- DocFreq is the count of documents which contain the term.
*
- TotalTermFreq is the total number of occurrences of the term. This is encoded as the
* difference between the total number of occurrences and the DocFreq.
+ *
- PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
+ * these contain arbitrary per-file data (such as parameters or versioning information) and
+ * per-term data (such as pointers to inverted files).
+ *
- For inner nodes of the tree, every entry will steal one bit to mark whether it points to
+ * child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted.
+ *
+ *
+ *
+ *
+ *
Term Metadata
+ *
+ * The .tmd file contains the list of term metadata (such as FST index metadata) and field level
+ * statistics (such as sum of total term freq).
+ *
+ *
+ * - TermsMeta (.tmd) --> Header, NumFields, <FieldStats>NumFields,
+ * TermIndexLength, TermDictLength, Footer
+ *
- FieldStats --> FieldNumber, NumTerms, RootCodeLength, ByteRootCodeLength,
+ * SumTotalTermFreq?, SumDocFreq, DocCount, MinTerm, MaxTerm, IndexStartFP, FSTHeader,
+ * FSTMetadata
+ *
- Header,FSTHeader --> {@link CodecUtil#writeHeader CodecHeader}
+ *
- TermIndexLength, TermDictLength --> {@link DataOutput#writeLong Uint64}
+ *
- MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[]
+ *
- NumFields,FieldNumber,RootCodeLength,DocCount --> {@link DataOutput#writeVInt VInt}
+ *
- NumTerms,SumTotalTermFreq,SumDocFreq,IndexStartFP --> {@link DataOutput#writeVLong
+ * VLong}
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ *
+ * Notes:
+ *
+ *
* - FieldNumber is the fields number from {@link FieldInfos}. (.fnm)
*
- NumTerms is the number of unique terms for the field.
*
- RootCode points to the root block for the field.
*
- SumDocFreq is the total number of postings, the number of term-document pairs across the
* entire field.
*
- DocCount is the number of documents that have at least one posting for this field.
- *
- LongsSize records how many long values the postings writer/reader record per term (e.g., to
- * hold freq/prox/doc file offsets).
*
- MinTerm, MaxTerm are the lowest and highest term in this field.
- *
- PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
- * these contain arbitrary per-file data (such as parameters or versioning information) and
- * per-term data (such as pointers to inverted files).
- *
- For inner nodes of the tree, every entry will steal one bit to mark whether it points to
- * child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted
*
*
*
@@ -169,11 +188,8 @@ import org.apache.lucene.util.fst.Util;
* saving a disk seek.
*
*
- * - TermsIndex (.tip) --> Header, FSTIndexNumFields
- * <IndexStartFP>NumFields, DirOffset, Footer
+ *
- TermsIndex (.tip) --> Header, FSTIndexNumFieldsFooter
*
- Header --> {@link CodecUtil#writeHeader CodecHeader}
- *
- DirOffset --> {@link DataOutput#writeLong Uint64}
- *
- IndexStartFP --> {@link DataOutput#writeVLong VLong}
*
*
- FSTIndex --> {@link FST FST<byte[]>}
*
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
@@ -185,7 +201,6 @@ import org.apache.lucene.util.fst.Util;
*
- The .tip file contains a separate FST for each field. The FST maps a term prefix to the
* on-disk block that holds all terms starting with that prefix. Each field's IndexStartFP
* points to its FST.
- *
- DirOffset is a pointer to the start of the IndexStartFPs for all fields
*
- It's possible that an on-disk block would contain too many terms (more than the allowed
* maximum (default: 48)). When this happens, the block is sub-divided into new blocks (called
* "floor blocks"), and then the output in the FST for the block's prefix encodes the leading