mirror of https://github.com/apache/lucene.git
LUCENE-9827: Propagate `numChunks` through bulk merges for term vectors as well.
This commit also adds more checks about the values of `numChunks`, `numDirtyChunks` and `numDirtyDocs` that would have helped discover this issue earlier.
This commit is contained in:
parent
9d15435b15
commit
a7b0aadcfc
|
@ -178,6 +178,31 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
|
|||
numDirtyChunks = metaIn.readVLong();
|
||||
numDirtyDocs = metaIn.readVLong();
|
||||
|
||||
if (numChunks < numDirtyChunks) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have more dirty chunks than chunks: numChunks="
|
||||
+ numChunks
|
||||
+ ", numDirtyChunks="
|
||||
+ numDirtyChunks,
|
||||
metaIn);
|
||||
}
|
||||
if ((numDirtyChunks == 0) != (numDirtyDocs == 0)) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have dirty chunks without dirty docs or vice-versa: numDirtyChunks="
|
||||
+ numDirtyChunks
|
||||
+ ", numDirtyDocs="
|
||||
+ numDirtyDocs,
|
||||
metaIn);
|
||||
}
|
||||
if (numDirtyDocs < numDirtyChunks) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have more dirty chunks than documents within dirty chunks: numDirtyChunks="
|
||||
+ numDirtyChunks
|
||||
+ ", numDirtyDocs="
|
||||
+ numDirtyDocs,
|
||||
metaIn);
|
||||
}
|
||||
|
||||
CodecUtil.checkFooter(metaIn, null);
|
||||
metaIn.close();
|
||||
|
||||
|
|
|
@ -173,6 +173,31 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
numDirtyChunks = metaIn.readVLong();
|
||||
numDirtyDocs = metaIn.readVLong();
|
||||
|
||||
if (numChunks < numDirtyChunks) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have more dirty chunks than chunks: numChunks="
|
||||
+ numChunks
|
||||
+ ", numDirtyChunks="
|
||||
+ numDirtyChunks,
|
||||
metaIn);
|
||||
}
|
||||
if ((numDirtyChunks == 0) != (numDirtyDocs == 0)) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have dirty chunks without dirty docs or vice-versa: numDirtyChunks="
|
||||
+ numDirtyChunks
|
||||
+ ", numDirtyDocs="
|
||||
+ numDirtyDocs,
|
||||
metaIn);
|
||||
}
|
||||
if (numDirtyDocs < numDirtyChunks) {
|
||||
throw new CorruptIndexException(
|
||||
"Cannot have more dirty chunks than documents within dirty chunks: numDirtyChunks="
|
||||
+ numDirtyChunks
|
||||
+ ", numDirtyDocs="
|
||||
+ numDirtyDocs,
|
||||
metaIn);
|
||||
}
|
||||
|
||||
decompressor = compressionMode.newDecompressor();
|
||||
this.reader =
|
||||
new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
|
||||
|
|
|
@ -904,6 +904,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
}
|
||||
|
||||
// since we bulk merged all chunks, we inherit any dirty ones from this segment.
|
||||
numChunks += matchingVectorsReader.getNumChunks();
|
||||
numDirtyChunks += matchingVectorsReader.getNumDirtyChunks();
|
||||
numDirtyDocs += matchingVectorsReader.getNumDirtyDocs();
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue