CheckIndex - Adding a `-level` parameter to give ability to control index check detail programmatically (#12797)

* CheckIndex - Making -fast the default behaviour

1. Making -fast the new default.
2. The previous -slow is moved to -slower
3. The previous default behavior (checksum + segment file content) is activated by -slow.

* gradlew tidy

* Add changes.txt

* Moved change to Lucene 10.0, now using -detailLevel param

* Fix failing test

* Add MIGRATE.md note and comment to remove deprecated params

* Fix failing unit test

* Changing detailLevel -> level

* catch invalid API calls

* Update lucene/core/src/java/org/apache/lucene/index/CheckIndex.java

Co-authored-by: Adrien Grand <jpountz@gmail.com>

---------

Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Jakub Slowinski 2023-11-28 17:19:31 +00:00 committed by GitHub
parent 9574cbd1f1
commit 203f506130
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 163 additions and 118 deletions

View File

@ -65,6 +65,8 @@ API Changes
IndexSearcher#search(Query, CollectorManager) for TopFieldCollectorManager IndexSearcher#search(Query, CollectorManager) for TopFieldCollectorManager
and TopScoreDocCollectorManager. (Zach Chen, Adrien Grand, Michael McCandless, Greg Miller, Luca Cavanna) and TopScoreDocCollectorManager. (Zach Chen, Adrien Grand, Michael McCandless, Greg Miller, Luca Cavanna)
* GITHUB#11023: Adding -level param to CheckIndex, making the old -fast param the default behaviour. (Jakub Slowinski)
New Features New Features
--------------------- ---------------------

View File

@ -101,6 +101,13 @@ The deprecated getter for the `Executor` that was optionally provided to the `In
has been removed. Users that want to execute concurrent tasks should rely instead on the `TaskExecutor` has been removed. Users that want to execute concurrent tasks should rely instead on the `TaskExecutor`
that the searcher holds, retrieved via `IndexSearcher#getTaskExecutor`. that the searcher holds, retrieved via `IndexSearcher#getTaskExecutor`.
### CheckIndex params -slow and -fast are deprecated, replaced by -level X (GITHUB#11023)
The `CheckIndex` former `-fast` behaviour of performing checksum checks only, is now the default.
Added a new parameter: `-level X`, to set the detail level of the index check. The higher the value, the more checks are performed.
Sample `-level` usage: `1` (Default) - Checksum checks only, `2` - all level 1 checks as well as logical integrity checks, `3` - all
level 2 checks as well as slow checks.
## Migration from Lucene 9.0 to Lucene 9.1 ## Migration from Lucene 9.0 to Lucene 9.1
### Test framework package migration and module (LUCENE-10301) ### Test framework package migration and module (LUCENE-10301)

View File

@ -22,6 +22,7 @@ import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -70,7 +71,7 @@ public class TestManyPointsInOldIndex extends LuceneTestCase {
dir.setCheckIndexOnClose(false); dir.setCheckIndexOnClose(false);
// ... because we check ourselves here: // ... because we check ourselves here:
TestUtil.checkIndex(dir, false, true, true, null); TestUtil.checkIndex(dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, null);
dir.close(); dir.close();
} }
} }

View File

@ -442,19 +442,20 @@ public final class CheckIndex implements Closeable {
IOUtils.close(writeLock); IOUtils.close(writeLock);
} }
private boolean doSlowChecks; private int level;
/** /**
* If true, additional slow checks are performed. This will likely drastically increase time it * Sets Level, the higher the value, the more additional checks are performed. This will likely
* takes to run CheckIndex! * drastically increase time it takes to run CheckIndex! See {@link Level}
*/ */
public void setDoSlowChecks(boolean v) { public void setLevel(int v) {
doSlowChecks = v; Level.checkIfLevelInBounds(v);
level = v;
} }
/** See {@link #setDoSlowChecks}. */ /** See {@link #setLevel}. */
public boolean doSlowChecks() { public int getLevel() {
return doSlowChecks; return level;
} }
private boolean failFast; private boolean failFast;
@ -474,21 +475,6 @@ public final class CheckIndex implements Closeable {
private boolean verbose; private boolean verbose;
/** See {@link #getChecksumsOnly}. */
public boolean getChecksumsOnly() {
return checksumsOnly;
}
/**
* If true, only validate physical integrity for all files. Note that the returned nested status
* objects (e.g. storedFieldStatus) will be null.
*/
public void setChecksumsOnly(boolean v) {
checksumsOnly = v;
}
private boolean checksumsOnly;
/** Set threadCount used for parallelizing index integrity checking. */ /** Set threadCount used for parallelizing index integrity checking. */
public void setThreadCount(int tc) { public void setThreadCount(int tc) {
if (tc <= 0) { if (tc <= 0) {
@ -1065,8 +1051,7 @@ public final class CheckIndex implements Closeable {
+ (info.info.maxDoc() - reader.numDocs())); + (info.info.maxDoc() - reader.numDocs()));
} }
} }
if (level >= Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS) {
if (checksumsOnly == false) {
// Test Livedocs // Test Livedocs
segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast); segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
@ -1077,15 +1062,14 @@ public final class CheckIndex implements Closeable {
segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast); segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
// Test the Term Index // Test the Term Index
segInfoStat.termIndexStatus = segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, level, failFast);
testPostings(reader, infoStream, verbose, doSlowChecks, failFast);
// Test Stored Fields // Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast); segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
// Test Term Vectors // Test Term Vectors
segInfoStat.termVectorStatus = segInfoStat.termVectorStatus =
testTermVectors(reader, infoStream, verbose, doSlowChecks, failFast); testTermVectors(reader, infoStream, verbose, level, failFast);
// Test Docvalues // Test Docvalues
segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast); segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
@ -1416,7 +1400,7 @@ public final class CheckIndex implements Closeable {
boolean isVectors, boolean isVectors,
PrintStream infoStream, PrintStream infoStream,
boolean verbose, boolean verbose,
boolean doSlowChecks) int level)
throws IOException { throws IOException {
// TODO: we should probably return our own stats thing...?! // TODO: we should probably return our own stats thing...?!
long startNS; long startNS;
@ -1999,14 +1983,13 @@ public final class CheckIndex implements Closeable {
} }
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th // Checking score blocks is heavy, we only do it on long postings lists, on every 1024th
// term // term or if slow checks are enabled.
// or if slow checks are enabled. if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS
if (doSlowChecks
|| docFreq > 1024 || docFreq > 1024
|| (status.termCount + status.delTermCount) % 1024 == 0) { || (status.termCount + status.delTermCount) % 1024 == 0) {
// First check max scores and block uptos // First check max scores and block uptos
// But only if slok checks are enabled since we visit all docs // But only if slok checks are enabled since we visit all docs
if (doSlowChecks) { if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
int max = -1; int max = -1;
int maxFreq = 0; int maxFreq = 0;
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS); ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
@ -2073,9 +2056,9 @@ public final class CheckIndex implements Closeable {
Impacts impacts = impactsEnum.getImpacts(); Impacts impacts = impactsEnum.getImpacts();
checkImpacts(impacts, doc); checkImpacts(impacts, doc);
maxFreq = Integer.MAX_VALUE; maxFreq = Integer.MAX_VALUE;
for (int level = 0; level < impacts.numLevels(); ++level) { for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
if (impacts.getDocIdUpTo(level) >= max) { if (impacts.getDocIdUpTo(impactsLevel) >= max) {
List<Impact> perLevelImpacts = impacts.getImpacts(level); List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq; maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
break; break;
} }
@ -2115,9 +2098,9 @@ public final class CheckIndex implements Closeable {
Impacts impacts = impactsEnum.getImpacts(); Impacts impacts = impactsEnum.getImpacts();
checkImpacts(impacts, doc); checkImpacts(impacts, doc);
maxFreq = Integer.MAX_VALUE; maxFreq = Integer.MAX_VALUE;
for (int level = 0; level < impacts.numLevels(); ++level) { for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
if (impacts.getDocIdUpTo(level) >= max) { if (impacts.getDocIdUpTo(impactsLevel) >= max) {
List<Impact> perLevelImpacts = impacts.getImpacts(level); List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq; maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
break; break;
} }
@ -2382,7 +2365,7 @@ public final class CheckIndex implements Closeable {
static void checkImpacts(Impacts impacts, int lastTarget) { static void checkImpacts(Impacts impacts, int lastTarget) {
final int numLevels = impacts.numLevels(); final int numLevels = impacts.numLevels();
if (numLevels < 1) { if (numLevels < 1) {
throw new CheckIndexException("The number of levels must be >= 1, got " + numLevels); throw new CheckIndexException("The number of impact levels must be >= 1, got " + numLevels);
} }
int docIdUpTo0 = impacts.getDocIdUpTo(0); int docIdUpTo0 = impacts.getDocIdUpTo(0);
@ -2394,17 +2377,17 @@ public final class CheckIndex implements Closeable {
+ lastTarget); + lastTarget);
} }
for (int level = 1; level < numLevels; ++level) { for (int impactsLevel = 1; impactsLevel < numLevels; ++impactsLevel) {
int docIdUpTo = impacts.getDocIdUpTo(level); int docIdUpTo = impacts.getDocIdUpTo(impactsLevel);
int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1); int previousDocIdUpTo = impacts.getDocIdUpTo(impactsLevel - 1);
if (docIdUpTo < previousDocIdUpTo) { if (docIdUpTo < previousDocIdUpTo) {
throw new CheckIndexException( throw new CheckIndexException(
"Decreasing return for getDocIdUpTo: level " "Decreasing return for getDocIdUpTo: level "
+ (level - 1) + (impactsLevel - 1)
+ " returned " + " returned "
+ previousDocIdUpTo + previousDocIdUpTo
+ " but level " + " but level "
+ level + impactsLevel
+ " returned " + " returned "
+ docIdUpTo + docIdUpTo
+ " for target " + " for target "
@ -2412,10 +2395,10 @@ public final class CheckIndex implements Closeable {
} }
} }
for (int level = 0; level < numLevels; ++level) { for (int impactsLevel = 0; impactsLevel < numLevels; ++impactsLevel) {
List<Impact> perLevelImpacts = impacts.getImpacts(level); List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
if (perLevelImpacts.isEmpty()) { if (perLevelImpacts.isEmpty()) {
throw new CheckIndexException("Got empty list of impacts on level " + level); throw new CheckIndexException("Got empty list of impacts on level " + impactsLevel);
} }
Impact first = perLevelImpacts.get(0); Impact first = perLevelImpacts.get(0);
if (first.freq < 1) { if (first.freq < 1) {
@ -2433,9 +2416,9 @@ public final class CheckIndex implements Closeable {
"Impacts are not ordered or contain dups, got " + previous + " then " + impact); "Impacts are not ordered or contain dups, got " + previous + " then " + impact);
} }
} }
if (level > 0) { if (impactsLevel > 0) {
// Make sure that impacts at level N trigger better scores than an level N-1 // Make sure that impacts at level N trigger better scores than an impactsLevel N-1
Iterator<Impact> previousIt = impacts.getImpacts(level - 1).iterator(); Iterator<Impact> previousIt = impacts.getImpacts(impactsLevel - 1).iterator();
previous = previousIt.next(); previous = previousIt.next();
Iterator<Impact> it = perLevelImpacts.iterator(); Iterator<Impact> it = perLevelImpacts.iterator();
Impact impact = it.next(); Impact impact = it.next();
@ -2451,9 +2434,9 @@ public final class CheckIndex implements Closeable {
"Found impact " "Found impact "
+ previous + previous
+ " on level " + " on level "
+ (level - 1) + (impactsLevel - 1)
+ " but no impact on level " + " but no impact on level "
+ level + impactsLevel
+ " triggers a better score: " + " triggers a better score: "
+ perLevelImpacts); + perLevelImpacts);
} }
@ -2470,7 +2453,7 @@ public final class CheckIndex implements Closeable {
*/ */
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream) public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream)
throws IOException { throws IOException {
return testPostings(reader, infoStream, false, true, false); return testPostings(reader, infoStream, false, Level.MIN_LEVEL_FOR_SLOW_CHECKS, false);
} }
/** /**
@ -2479,15 +2462,11 @@ public final class CheckIndex implements Closeable {
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermIndexStatus testPostings( public static Status.TermIndexStatus testPostings(
CodecReader reader, CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
PrintStream infoStream,
boolean verbose,
boolean doSlowChecks,
boolean failFast)
throws IOException { throws IOException {
// TODO: we should go and verify term vectors match, if // TODO: we should go and verify term vectors match, if the Level is high enough to
// doSlowChecks is on... // include slow checks
Status.TermIndexStatus status; Status.TermIndexStatus status;
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
@ -2518,7 +2497,7 @@ public final class CheckIndex implements Closeable {
false, false,
infoStream, infoStream,
verbose, verbose,
doSlowChecks); level);
} catch (Throwable e) { } catch (Throwable e) {
if (failFast) { if (failFast) {
throw IOUtils.rethrowAlways(e); throw IOUtils.rethrowAlways(e);
@ -3661,7 +3640,7 @@ public final class CheckIndex implements Closeable {
*/ */
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream) public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream)
throws IOException { throws IOException {
return testTermVectors(reader, infoStream, false, false, false); return testTermVectors(reader, infoStream, false, Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, false);
} }
/** /**
@ -3670,11 +3649,7 @@ public final class CheckIndex implements Closeable {
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermVectorStatus testTermVectors( public static Status.TermVectorStatus testTermVectors(
CodecReader reader, CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
PrintStream infoStream,
boolean verbose,
boolean doSlowChecks,
boolean failFast)
throws IOException { throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.TermVectorStatus status = new Status.TermVectorStatus(); final Status.TermVectorStatus status = new Status.TermVectorStatus();
@ -3687,14 +3662,14 @@ public final class CheckIndex implements Closeable {
PostingsEnum postings = null; PostingsEnum postings = null;
// Only used if doSlowChecks is true: // Only used if the Level is high enough to include slow checks:
PostingsEnum postingsDocs = null; PostingsEnum postingsDocs = null;
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
FieldsProducer postingsFields; FieldsProducer postingsFields;
// TODO: testTermsIndex // TODO: testTermsIndex
if (doSlowChecks) { if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
postingsFields = reader.getPostingsReader(); postingsFields = reader.getPostingsReader();
if (postingsFields != null) { if (postingsFields != null) {
postingsFields = postingsFields.getMergeInstance(); postingsFields = postingsFields.getMergeInstance();
@ -3718,8 +3693,7 @@ public final class CheckIndex implements Closeable {
if (tfv != null) { if (tfv != null) {
// First run with no deletions: // First run with no deletions:
checkFields( checkFields(tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, level);
tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, doSlowChecks);
// Only agg stats if the doc is live: // Only agg stats if the doc is live:
final boolean doStats = liveDocs == null || liveDocs.get(j); final boolean doStats = liveDocs == null || liveDocs.get(j);
@ -3744,7 +3718,7 @@ public final class CheckIndex implements Closeable {
+ " but FieldInfo has storeTermVector=false"); + " but FieldInfo has storeTermVector=false");
} }
if (doSlowChecks) { if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
Terms terms = tfv.terms(field); Terms terms = tfv.terms(field);
TermsEnum termsEnum = terms.iterator(); TermsEnum termsEnum = terms.iterator();
final boolean postingsHasFreq = final boolean postingsHasFreq =
@ -4047,9 +4021,8 @@ public final class CheckIndex implements Closeable {
/** Run-time configuration options for CheckIndex commands. */ /** Run-time configuration options for CheckIndex commands. */
public static class Options { public static class Options {
boolean doExorcise = false; boolean doExorcise = false;
boolean doSlowChecks = false;
boolean verbose = false; boolean verbose = false;
boolean doChecksumsOnly = false; int level = Level.DEFAULT_VALUE;
int threadCount; int threadCount;
List<String> onlySegments = new ArrayList<>(); List<String> onlySegments = new ArrayList<>();
String indexPath = null; String indexPath = null;
@ -4113,6 +4086,42 @@ public final class CheckIndex implements Closeable {
} }
} }
/** Class with static variables with information about CheckIndex's -level parameter. */
public static class Level {
private Level() {}
/** Minimum valid level. */
public static final int MIN_VALUE = 1;
/** Maximum valid level. */
public static final int MAX_VALUE = 3;
/** The default level if none is specified. */
public static final int DEFAULT_VALUE = MIN_VALUE;
/** Minimum level required to run checksum checks. */
public static final int MIN_LEVEL_FOR_CHECKSUM_CHECKS = 1;
/** Minimum level required to run integrity checks. */
public static final int MIN_LEVEL_FOR_INTEGRITY_CHECKS = 2;
/** Minimum level required to run slow checks. */
public static final int MIN_LEVEL_FOR_SLOW_CHECKS = 3;
/** Checks if given level value is within the allowed bounds else it raises an Exception. */
public static void checkIfLevelInBounds(int levelVal) throws IllegalArgumentException {
if (levelVal < Level.MIN_VALUE || levelVal > Level.MAX_VALUE) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"ERROR: given value: '%d' for -level option is out of bounds. Please use a value from '%d'->'%d'",
levelVal,
Level.MIN_VALUE,
Level.MAX_VALUE));
}
}
}
/** /**
* Parse command line args into fields * Parse command line args into fields
* *
@ -4127,15 +4136,29 @@ public final class CheckIndex implements Closeable {
int i = 0; int i = 0;
while (i < args.length) { while (i < args.length) {
String arg = args[i]; String arg = args[i];
if ("-fast".equals(arg)) { if ("-level".equals(arg)) {
opts.doChecksumsOnly = true; if (i == args.length - 1) {
throw new IllegalArgumentException("ERROR: missing value for -level option");
}
i++;
int level = Integer.parseInt(args[i]);
Level.checkIfLevelInBounds(level);
opts.level = level;
} else if ("-fast".equals(arg)) {
// Deprecated. Remove in Lucene 11.
System.err.println(
"-fast is deprecated, use '-level 1' for explicitly verifying file checksums only. This is also now the default "
+ "behaviour!");
} else if ("-slow".equals(arg)) {
// Deprecated. Remove in Lucene 11.
System.err.println("-slow is deprecated, use '-level 3' instead for slow checks");
opts.level = Level.MIN_LEVEL_FOR_SLOW_CHECKS;
} else if ("-exorcise".equals(arg)) { } else if ("-exorcise".equals(arg)) {
opts.doExorcise = true; opts.doExorcise = true;
} else if ("-crossCheckTermVectors".equals(arg)) { } else if ("-crossCheckTermVectors".equals(arg)) {
System.err.println("-crossCheckTermVectors is deprecated, use -slow instead"); // Deprecated. Remove in Lucene 11.
opts.doSlowChecks = true; System.err.println("-crossCheckTermVectors is deprecated, use '-level 3' instead");
} else if ("-slow".equals(arg)) { opts.level = Level.MAX_VALUE;
opts.doSlowChecks = true;
} else if (arg.equals("-verbose")) { } else if (arg.equals("-verbose")) {
opts.verbose = true; opts.verbose = true;
} else if (arg.equals("-segment")) { } else if (arg.equals("-segment")) {
@ -4172,11 +4195,13 @@ public final class CheckIndex implements Closeable {
if (opts.indexPath == null) { if (opts.indexPath == null) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"\nERROR: index path not specified" "\nERROR: index path not specified"
+ "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-slow] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n" + "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-level X] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n"
+ "\n" + "\n"
+ " -exorcise: actually write a new segments_N file, removing any problematic segments\n" + " -exorcise: actually write a new segments_N file, removing any problematic segments\n"
+ " -fast: just verify file checksums, omitting logical integrity checks\n" + " -level X: sets the detail level of the check. The higher the value, the more checks are done.\n"
+ " -slow: do additional slow checks; THIS IS VERY SLOW!\n" + " 1 - (Default) Checksum checks only.\n"
+ " 2 - All level 1 checks + logical integrity checks.\n"
+ " 3 - All level 2 checks + slow checks.\n"
+ " -codec X: when exorcising, codec to write the new segments_N file with\n" + " -codec X: when exorcising, codec to write the new segments_N file with\n"
+ " -verbose: print additional details\n" + " -verbose: print additional details\n"
+ " -segment X: only check the specified segments. This can be specified multiple\n" + " -segment X: only check the specified segments. This can be specified multiple\n"
@ -4191,7 +4216,8 @@ public final class CheckIndex implements Closeable {
+ "If no package is specified the " + "If no package is specified the "
+ FSDirectory.class.getPackage().getName() + FSDirectory.class.getPackage().getName()
+ " package will be used.\n" + " package will be used.\n"
+ "\n" + "CheckIndex only verifies file checksums as default.\n"
+ "Use -level with value of '2' or higher if you also want to check segment file contents.\n\n"
+ "**WARNING**: -exorcise *LOSES DATA*. This should only be used on an emergency basis as it will cause\n" + "**WARNING**: -exorcise *LOSES DATA*. This should only be used on an emergency basis as it will cause\n"
+ "documents (perhaps many) to be permanently removed from the index. Always make\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n"
+ "a backup copy of your index before running this! Do not run this tool on an index\n" + "a backup copy of your index before running this! Do not run this tool on an index\n"
@ -4213,10 +4239,6 @@ public final class CheckIndex implements Closeable {
throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment"); throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
} }
if (opts.doChecksumsOnly && opts.doSlowChecks) {
throw new IllegalArgumentException("ERROR: cannot specify both -fast and -slow");
}
return opts; return opts;
} }
@ -4227,8 +4249,7 @@ public final class CheckIndex implements Closeable {
* @return 0 iff the index is clean, 1 otherwise * @return 0 iff the index is clean, 1 otherwise
*/ */
public int doCheck(Options opts) throws IOException, InterruptedException { public int doCheck(Options opts) throws IOException, InterruptedException {
setDoSlowChecks(opts.doSlowChecks); setLevel(opts.level);
setChecksumsOnly(opts.doChecksumsOnly);
setInfoStream(opts.out, opts.verbose); setInfoStream(opts.out, opts.verbose);
// user provided thread count via command line argument, overriding the default with user // user provided thread count via command line argument, overriding the default with user
// provided value // provided value

View File

@ -166,7 +166,11 @@ public class TestAllFilesDetectTruncation extends LuceneTestCase {
expectThrows(Exception.class, () -> DirectoryReader.open(dirCopy).close()); expectThrows(Exception.class, () -> DirectoryReader.open(dirCopy).close());
// CheckIndex should also fail: // CheckIndex should also fail:
expectThrows(Exception.class, () -> TestUtil.checkIndex(dirCopy, true, true, true, null)); expectThrows(
Exception.class,
() ->
TestUtil.checkIndex(
dirCopy, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS, true, true, null));
} }
} }
} }

View File

@ -139,7 +139,9 @@ public class TestCheckIndex extends BaseTestCheckIndex {
} }
ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream output = new ByteArrayOutputStream();
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output); CheckIndex.Status status =
TestUtil.checkIndex(
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
assertEquals(1, status.segmentInfos.size()); assertEquals(1, status.segmentInfos.size());

View File

@ -2714,7 +2714,7 @@ public class TestIndexWriter extends LuceneTestCase {
// Make sure CheckIndex includes id output: // Make sure CheckIndex includes id output:
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(d); CheckIndex checker = new CheckIndex(d);
checker.setDoSlowChecks(false); checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false); checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
CheckIndex.Status indexStatus = checker.checkIndex(null); CheckIndex.Status indexStatus = checker.checkIndex(null);
String s = bos.toString(IOUtils.UTF_8); String s = bos.toString(IOUtils.UTF_8);

View File

@ -731,7 +731,9 @@ public class TestPointValues extends LuceneTestCase {
w.close(); w.close();
ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream output = new ByteArrayOutputStream();
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output); CheckIndex.Status status =
TestUtil.checkIndex(
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
assertEquals(1, status.segmentInfos.size()); assertEquals(1, status.segmentInfos.size());
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0); CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
// total 3 point values were index: // total 3 point values were index:

View File

@ -121,7 +121,9 @@ public class TestSwappedIndexFiles extends LuceneTestCase {
EOFException.class, EOFException.class,
IndexFormatTooOldException.class, IndexFormatTooOldException.class,
CheckIndex.CheckIndexException.class), CheckIndex.CheckIndexException.class),
() -> TestUtil.checkIndex(dirCopy, true, true, true, null)); () ->
TestUtil.checkIndex(
dirCopy, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS, true, true, null));
} }
} }
} }

View File

@ -1270,7 +1270,9 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
} }
ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream output = new ByteArrayOutputStream();
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output); CheckIndex.Status status =
TestUtil.checkIndex(
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
assertEquals(1, status.segmentInfos.size()); assertEquals(1, status.segmentInfos.size());
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0); CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
// total 3 vector values were indexed: // total 3 vector values were indexed:

View File

@ -59,6 +59,7 @@ public class BaseTestCheckIndex extends LuceneTestCase {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir); CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS);
if (VERBOSE) checker.setInfoStream(System.out); if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex(); CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) { if (indexStatus.clean == false) {

View File

@ -17,6 +17,7 @@
package org.apache.lucene.tests.store; package org.apache.lucene.tests.store;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FilterDirectory;
@ -29,7 +30,7 @@ import org.apache.lucene.tests.util.TestUtil;
public abstract class BaseDirectoryWrapper extends FilterDirectory { public abstract class BaseDirectoryWrapper extends FilterDirectory {
private boolean checkIndexOnClose = true; private boolean checkIndexOnClose = true;
private boolean doSlowChecksOnClose = true; private int levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS;
protected volatile boolean isOpen = true; protected volatile boolean isOpen = true;
protected BaseDirectoryWrapper(Directory delegate) { protected BaseDirectoryWrapper(Directory delegate) {
@ -41,7 +42,7 @@ public abstract class BaseDirectoryWrapper extends FilterDirectory {
if (isOpen) { if (isOpen) {
isOpen = false; isOpen = false;
if (checkIndexOnClose && DirectoryReader.indexExists(this)) { if (checkIndexOnClose && DirectoryReader.indexExists(this)) {
TestUtil.checkIndex(this, doSlowChecksOnClose); TestUtil.checkIndex(this, levelForCheckOnClose);
} }
} }
super.close(); super.close();
@ -61,10 +62,15 @@ public abstract class BaseDirectoryWrapper extends FilterDirectory {
} }
public void setCrossCheckTermVectorsOnClose(boolean value) { public void setCrossCheckTermVectorsOnClose(boolean value) {
this.doSlowChecksOnClose = value; // If true, we are enabling slow checks.
if (value == true) {
this.levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS;
} else {
this.levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS;
}
} }
public boolean getCrossCheckTermVectorsOnClose() { public int getLevelForCheckOnClose() {
return doSlowChecksOnClose; return levelForCheckOnClose;
} }
} }

View File

@ -906,7 +906,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
// TestUtil#checkIndex checks segment concurrently using another thread, but making // TestUtil#checkIndex checks segment concurrently using another thread, but making
// call back to synchronized methods such as MockDirectoryWrapper#fileLength. // call back to synchronized methods such as MockDirectoryWrapper#fileLength.
// Hence passing concurrent = false to this method to turn off concurrent checks. // Hence passing concurrent = false to this method to turn off concurrent checks.
TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, false, null); TestUtil.checkIndex(this, getLevelForCheckOnClose(), true, false, null);
} }
// TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles

View File

@ -306,12 +306,11 @@ public final class TestUtil {
* thrown; else, true is returned. * thrown; else, true is returned.
*/ */
public static CheckIndex.Status checkIndex(Directory dir) throws IOException { public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
return checkIndex(dir, true); return checkIndex(dir, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS);
} }
public static CheckIndex.Status checkIndex(Directory dir, boolean doSlowChecks) public static CheckIndex.Status checkIndex(Directory dir, int level) throws IOException {
throws IOException { return checkIndex(dir, level, false, true, null);
return checkIndex(dir, doSlowChecks, false, true, null);
} }
/** /**
@ -319,11 +318,7 @@ public final class TestUtil {
* moving on to other fields/segments to look for any other corruption. * moving on to other fields/segments to look for any other corruption.
*/ */
public static CheckIndex.Status checkIndex( public static CheckIndex.Status checkIndex(
Directory dir, Directory dir, int level, boolean failFast, boolean concurrent, ByteArrayOutputStream output)
boolean doSlowChecks,
boolean failFast,
boolean concurrent,
ByteArrayOutputStream output)
throws IOException { throws IOException {
if (output == null) { if (output == null) {
output = new ByteArrayOutputStream(1024); output = new ByteArrayOutputStream(1024);
@ -332,7 +327,7 @@ public final class TestUtil {
// some tests e.g. exception tests become much more complicated if they have to close the writer // some tests e.g. exception tests become much more complicated if they have to close the writer
try (CheckIndex checker = try (CheckIndex checker =
new CheckIndex(dir, NoLockFactory.INSTANCE.obtainLock(dir, "bogus"))) { new CheckIndex(dir, NoLockFactory.INSTANCE.obtainLock(dir, "bogus"))) {
checker.setDoSlowChecks(doSlowChecks); checker.setLevel(level);
checker.setFailFast(failFast); checker.setFailFast(failFast);
checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false); checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
if (concurrent) { if (concurrent) {
@ -361,11 +356,11 @@ public final class TestUtil {
*/ */
public static void checkReader(IndexReader reader) throws IOException { public static void checkReader(IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) { for (LeafReaderContext context : reader.leaves()) {
checkReader(context.reader(), true); checkReader(context.reader(), CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS);
} }
} }
public static void checkReader(LeafReader reader, boolean doSlowChecks) throws IOException { public static void checkReader(LeafReader reader, int level) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8); PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
@ -379,9 +374,9 @@ public final class TestUtil {
CheckIndex.testLiveDocs(codecReader, infoStream, true); CheckIndex.testLiveDocs(codecReader, infoStream, true);
CheckIndex.testFieldInfos(codecReader, infoStream, true); CheckIndex.testFieldInfos(codecReader, infoStream, true);
CheckIndex.testFieldNorms(codecReader, infoStream, true); CheckIndex.testFieldNorms(codecReader, infoStream, true);
CheckIndex.testPostings(codecReader, infoStream, false, doSlowChecks, true); CheckIndex.testPostings(codecReader, infoStream, false, level, true);
CheckIndex.testStoredFields(codecReader, infoStream, true); CheckIndex.testStoredFields(codecReader, infoStream, true);
CheckIndex.testTermVectors(codecReader, infoStream, false, doSlowChecks, true); CheckIndex.testTermVectors(codecReader, infoStream, false, level, true);
CheckIndex.testDocValues(codecReader, infoStream, true); CheckIndex.testDocValues(codecReader, infoStream, true);
CheckIndex.testPoints(codecReader, infoStream, true); CheckIndex.testPoints(codecReader, infoStream, true);