mirror of
https://github.com/apache/lucene.git
synced 2025-02-07 10:38:40 +00:00
LUCENE-10272: cross-check norms with postings in checkindex (#493)
Previously, CheckIndex would iterate norms and validate each one. But if norms that should be there were missing, nothing would fail. Now it computes an expected count of norms and ensures it saw them all.
This commit is contained in:
parent
51e023bf7a
commit
c89c78cee0
@ -2115,6 +2115,8 @@ public final class CheckIndex implements Closeable {
|
||||
|
||||
if (fieldInfo.hasNorms() && isVectors == false) {
|
||||
final NumericDocValues norms = normsProducer.getNorms(fieldInfo);
|
||||
// count of valid norm values found for the field
|
||||
int actualCount = 0;
|
||||
// Cross-check terms with norms
|
||||
for (int doc = norms.nextDoc();
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
@ -2126,12 +2128,15 @@ public final class CheckIndex implements Closeable {
|
||||
continue;
|
||||
}
|
||||
final long norm = norms.longValue();
|
||||
if (norm != 0 && visitedDocs.get(doc) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Document "
|
||||
+ doc
|
||||
+ " doesn't have terms according to postings but has a norm value that is not zero: "
|
||||
+ Long.toUnsignedString(norm));
|
||||
if (norm != 0) {
|
||||
actualCount++;
|
||||
if (visitedDocs.get(doc) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Document "
|
||||
+ doc
|
||||
+ " doesn't have terms according to postings but has a norm value that is not zero: "
|
||||
+ Long.toUnsignedString(norm));
|
||||
}
|
||||
} else if (norm == 0 && visitedDocs.get(doc)) {
|
||||
throw new CheckIndexException(
|
||||
"Document "
|
||||
@ -2139,6 +2144,25 @@ public final class CheckIndex implements Closeable {
|
||||
+ " has terms according to postings but its norm value is 0, which may only be used on documents that have no terms");
|
||||
}
|
||||
}
|
||||
int expectedCount = 0;
|
||||
for (int doc = visitedDocs.nextSetBit(0);
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc =
|
||||
doc + 1 >= visitedDocs.length()
|
||||
? DocIdSetIterator.NO_MORE_DOCS
|
||||
: visitedDocs.nextSetBit(doc + 1)) {
|
||||
if (liveDocs != null && liveDocs.get(doc) == false) {
|
||||
// Norms may only be out of sync with terms on deleted documents.
|
||||
// This happens when a document fails indexing and in that case it
|
||||
// should be immediately marked as deleted by the IndexWriter.
|
||||
continue;
|
||||
}
|
||||
expectedCount++;
|
||||
}
|
||||
if (expectedCount != actualCount) {
|
||||
throw new CheckIndexException(
|
||||
"actual norm count: " + actualCount + " but expected: " + expectedCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Test seek to last term:
|
||||
|
Loading…
x
Reference in New Issue
Block a user