From 107926e486f8cd6bbfc8abb055c9f58fe56f9cbb Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 21 Feb 2021 11:19:41 -0500 Subject: [PATCH] LUCENE-9795: fix CheckIndex not to validate SortedDocValues as if they were BinaryDocValues CheckIndex already validates SortedDocValues properly: reads every document's ordinal and validates derefing all the ordinals back to bytes from the terms dictionary. It should not do an additional (very slow) pass where it treats the field as if it were binary (doc -> ord -> byte[]), this is slow and doesn't validate any additional index data. Now that the term dictionary of SortedDocValues may be compressed, it is especially slow to misuse the docvalues field in this way. --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 393135d47fd..cb946ea8492 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -3385,7 +3385,6 @@ public final class CheckIndex implements Closeable { case SORTED: status.totalSortedFields++; checkDVIterator(fi, maxDoc, dvReader::getSorted); - checkBinaryDocValues(fi.name, maxDoc, dvReader.getSorted(fi), dvReader.getSorted(fi)); checkSortedDocValues(fi.name, maxDoc, dvReader.getSorted(fi), dvReader.getSorted(fi)); break; case SORTED_NUMERIC: