LUCENE-9827: Propagate `numChunks` through bulk merges for term vectors as well.

This commit also adds more checks about the values of `numChunks`,
`numDirtyChunks` and `numDirtyDocs` that would have helped discover this issue
earlier.
This commit is contained in:
Adrien Grand 2021-04-12 09:44:35 +02:00
parent 9d15435b15
commit a7b0aadcfc
3 changed files with 51 additions and 0 deletions

View File

@ -178,6 +178,31 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
numDirtyChunks = metaIn.readVLong(); numDirtyChunks = metaIn.readVLong();
numDirtyDocs = metaIn.readVLong(); numDirtyDocs = metaIn.readVLong();
if (numChunks < numDirtyChunks) {
throw new CorruptIndexException(
"Cannot have more dirty chunks than chunks: numChunks="
+ numChunks
+ ", numDirtyChunks="
+ numDirtyChunks,
metaIn);
}
if ((numDirtyChunks == 0) != (numDirtyDocs == 0)) {
throw new CorruptIndexException(
"Cannot have dirty chunks without dirty docs or vice-versa: numDirtyChunks="
+ numDirtyChunks
+ ", numDirtyDocs="
+ numDirtyDocs,
metaIn);
}
if (numDirtyDocs < numDirtyChunks) {
throw new CorruptIndexException(
"Cannot have more dirty chunks than documents within dirty chunks: numDirtyChunks="
+ numDirtyChunks
+ ", numDirtyDocs="
+ numDirtyDocs,
metaIn);
}
CodecUtil.checkFooter(metaIn, null); CodecUtil.checkFooter(metaIn, null);
metaIn.close(); metaIn.close();

View File

@ -173,6 +173,31 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
numDirtyChunks = metaIn.readVLong(); numDirtyChunks = metaIn.readVLong();
numDirtyDocs = metaIn.readVLong(); numDirtyDocs = metaIn.readVLong();
if (numChunks < numDirtyChunks) {
throw new CorruptIndexException(
"Cannot have more dirty chunks than chunks: numChunks="
+ numChunks
+ ", numDirtyChunks="
+ numDirtyChunks,
metaIn);
}
if ((numDirtyChunks == 0) != (numDirtyDocs == 0)) {
throw new CorruptIndexException(
"Cannot have dirty chunks without dirty docs or vice-versa: numDirtyChunks="
+ numDirtyChunks
+ ", numDirtyDocs="
+ numDirtyDocs,
metaIn);
}
if (numDirtyDocs < numDirtyChunks) {
throw new CorruptIndexException(
"Cannot have more dirty chunks than documents within dirty chunks: numDirtyChunks="
+ numDirtyChunks
+ ", numDirtyDocs="
+ numDirtyDocs,
metaIn);
}
decompressor = compressionMode.newDecompressor(); decompressor = compressionMode.newDecompressor();
this.reader = this.reader =
new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);

View File

@ -904,6 +904,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
} }
// since we bulk merged all chunks, we inherit any dirty ones from this segment. // since we bulk merged all chunks, we inherit any dirty ones from this segment.
numChunks += matchingVectorsReader.getNumChunks();
numDirtyChunks += matchingVectorsReader.getNumDirtyChunks(); numDirtyChunks += matchingVectorsReader.getNumDirtyChunks();
numDirtyDocs += matchingVectorsReader.getNumDirtyDocs(); numDirtyDocs += matchingVectorsReader.getNumDirtyDocs();
} else { } else {