mirror of https://github.com/apache/lucene.git
CheckIndex - Adding a `-level` parameter to give ability to control index check detail programmatically (#12797)
* CheckIndex - Making -fast the default behaviour 1. Making -fast the new default. 2. The previous -slow is moved to -slower 3. The previous default behavior (checksum + segment file content) is activated by -slow. * gradlew tidy * Add changes.txt * Moved change to Lucene 10.0, now using -detailLevel param * Fix failing test * Add MIGRATE.md note and comment to remove deprecated params * Fix failing unit test * Changing detailLevel -> level * catch invalid API calls * Update lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Co-authored-by: Adrien Grand <jpountz@gmail.com> --------- Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
9574cbd1f1
commit
203f506130
|
@ -65,6 +65,8 @@ API Changes
|
|||
IndexSearcher#search(Query, CollectorManager) for TopFieldCollectorManager
|
||||
and TopScoreDocCollectorManager. (Zach Chen, Adrien Grand, Michael McCandless, Greg Miller, Luca Cavanna)
|
||||
|
||||
* GITHUB#11023: Adding -level param to CheckIndex, making the old -fast param the default behaviour. (Jakub Slowinski)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -101,6 +101,13 @@ The deprecated getter for the `Executor` that was optionally provided to the `In
|
|||
has been removed. Users that want to execute concurrent tasks should rely instead on the `TaskExecutor`
|
||||
that the searcher holds, retrieved via `IndexSearcher#getTaskExecutor`.
|
||||
|
||||
### CheckIndex params -slow and -fast are deprecated, replaced by -level X (GITHUB#11023)
|
||||
|
||||
The `CheckIndex` former `-fast` behaviour of performing checksum checks only, is now the default.
|
||||
Added a new parameter: `-level X`, to set the detail level of the index check. The higher the value, the more checks are performed.
|
||||
Sample `-level` usage: `1` (Default) - Checksum checks only, `2` - all level 1 checks as well as logical integrity checks, `3` - all
|
||||
level 2 checks as well as slow checks.
|
||||
|
||||
## Migration from Lucene 9.0 to Lucene 9.1
|
||||
|
||||
### Test framework package migration and module (LUCENE-10301)
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.nio.file.Path;
|
|||
import java.nio.file.Paths;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -70,7 +71,7 @@ public class TestManyPointsInOldIndex extends LuceneTestCase {
|
|||
dir.setCheckIndexOnClose(false);
|
||||
|
||||
// ... because we check ourselves here:
|
||||
TestUtil.checkIndex(dir, false, true, true, null);
|
||||
TestUtil.checkIndex(dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, null);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -442,19 +442,20 @@ public final class CheckIndex implements Closeable {
|
|||
IOUtils.close(writeLock);
|
||||
}
|
||||
|
||||
private boolean doSlowChecks;
|
||||
private int level;
|
||||
|
||||
/**
|
||||
* If true, additional slow checks are performed. This will likely drastically increase time it
|
||||
* takes to run CheckIndex!
|
||||
* Sets Level, the higher the value, the more additional checks are performed. This will likely
|
||||
* drastically increase time it takes to run CheckIndex! See {@link Level}
|
||||
*/
|
||||
public void setDoSlowChecks(boolean v) {
|
||||
doSlowChecks = v;
|
||||
public void setLevel(int v) {
|
||||
Level.checkIfLevelInBounds(v);
|
||||
level = v;
|
||||
}
|
||||
|
||||
/** See {@link #setDoSlowChecks}. */
|
||||
public boolean doSlowChecks() {
|
||||
return doSlowChecks;
|
||||
/** See {@link #setLevel}. */
|
||||
public int getLevel() {
|
||||
return level;
|
||||
}
|
||||
|
||||
private boolean failFast;
|
||||
|
@ -474,21 +475,6 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
private boolean verbose;
|
||||
|
||||
/** See {@link #getChecksumsOnly}. */
|
||||
public boolean getChecksumsOnly() {
|
||||
return checksumsOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, only validate physical integrity for all files. Note that the returned nested status
|
||||
* objects (e.g. storedFieldStatus) will be null.
|
||||
*/
|
||||
public void setChecksumsOnly(boolean v) {
|
||||
checksumsOnly = v;
|
||||
}
|
||||
|
||||
private boolean checksumsOnly;
|
||||
|
||||
/** Set threadCount used for parallelizing index integrity checking. */
|
||||
public void setThreadCount(int tc) {
|
||||
if (tc <= 0) {
|
||||
|
@ -1065,8 +1051,7 @@ public final class CheckIndex implements Closeable {
|
|||
+ (info.info.maxDoc() - reader.numDocs()));
|
||||
}
|
||||
}
|
||||
|
||||
if (checksumsOnly == false) {
|
||||
if (level >= Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS) {
|
||||
// Test Livedocs
|
||||
segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
|
||||
|
||||
|
@ -1077,15 +1062,14 @@ public final class CheckIndex implements Closeable {
|
|||
segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
|
||||
|
||||
// Test the Term Index
|
||||
segInfoStat.termIndexStatus =
|
||||
testPostings(reader, infoStream, verbose, doSlowChecks, failFast);
|
||||
segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, level, failFast);
|
||||
|
||||
// Test Stored Fields
|
||||
segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
|
||||
|
||||
// Test Term Vectors
|
||||
segInfoStat.termVectorStatus =
|
||||
testTermVectors(reader, infoStream, verbose, doSlowChecks, failFast);
|
||||
testTermVectors(reader, infoStream, verbose, level, failFast);
|
||||
|
||||
// Test Docvalues
|
||||
segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
|
||||
|
@ -1416,7 +1400,7 @@ public final class CheckIndex implements Closeable {
|
|||
boolean isVectors,
|
||||
PrintStream infoStream,
|
||||
boolean verbose,
|
||||
boolean doSlowChecks)
|
||||
int level)
|
||||
throws IOException {
|
||||
// TODO: we should probably return our own stats thing...?!
|
||||
long startNS;
|
||||
|
@ -1999,14 +1983,13 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
|
||||
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th
|
||||
// term
|
||||
// or if slow checks are enabled.
|
||||
if (doSlowChecks
|
||||
// term or if slow checks are enabled.
|
||||
if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS
|
||||
|| docFreq > 1024
|
||||
|| (status.termCount + status.delTermCount) % 1024 == 0) {
|
||||
// First check max scores and block uptos
|
||||
// But only if slok checks are enabled since we visit all docs
|
||||
if (doSlowChecks) {
|
||||
if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
|
||||
int max = -1;
|
||||
int maxFreq = 0;
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
|
@ -2073,9 +2056,9 @@ public final class CheckIndex implements Closeable {
|
|||
Impacts impacts = impactsEnum.getImpacts();
|
||||
checkImpacts(impacts, doc);
|
||||
maxFreq = Integer.MAX_VALUE;
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
|
||||
if (impacts.getDocIdUpTo(impactsLevel) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
|
||||
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||
break;
|
||||
}
|
||||
|
@ -2115,9 +2098,9 @@ public final class CheckIndex implements Closeable {
|
|||
Impacts impacts = impactsEnum.getImpacts();
|
||||
checkImpacts(impacts, doc);
|
||||
maxFreq = Integer.MAX_VALUE;
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
|
||||
if (impacts.getDocIdUpTo(impactsLevel) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
|
||||
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||
break;
|
||||
}
|
||||
|
@ -2382,7 +2365,7 @@ public final class CheckIndex implements Closeable {
|
|||
static void checkImpacts(Impacts impacts, int lastTarget) {
|
||||
final int numLevels = impacts.numLevels();
|
||||
if (numLevels < 1) {
|
||||
throw new CheckIndexException("The number of levels must be >= 1, got " + numLevels);
|
||||
throw new CheckIndexException("The number of impact levels must be >= 1, got " + numLevels);
|
||||
}
|
||||
|
||||
int docIdUpTo0 = impacts.getDocIdUpTo(0);
|
||||
|
@ -2394,17 +2377,17 @@ public final class CheckIndex implements Closeable {
|
|||
+ lastTarget);
|
||||
}
|
||||
|
||||
for (int level = 1; level < numLevels; ++level) {
|
||||
int docIdUpTo = impacts.getDocIdUpTo(level);
|
||||
int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1);
|
||||
for (int impactsLevel = 1; impactsLevel < numLevels; ++impactsLevel) {
|
||||
int docIdUpTo = impacts.getDocIdUpTo(impactsLevel);
|
||||
int previousDocIdUpTo = impacts.getDocIdUpTo(impactsLevel - 1);
|
||||
if (docIdUpTo < previousDocIdUpTo) {
|
||||
throw new CheckIndexException(
|
||||
"Decreasing return for getDocIdUpTo: level "
|
||||
+ (level - 1)
|
||||
+ (impactsLevel - 1)
|
||||
+ " returned "
|
||||
+ previousDocIdUpTo
|
||||
+ " but level "
|
||||
+ level
|
||||
+ impactsLevel
|
||||
+ " returned "
|
||||
+ docIdUpTo
|
||||
+ " for target "
|
||||
|
@ -2412,10 +2395,10 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
for (int level = 0; level < numLevels; ++level) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
for (int impactsLevel = 0; impactsLevel < numLevels; ++impactsLevel) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
|
||||
if (perLevelImpacts.isEmpty()) {
|
||||
throw new CheckIndexException("Got empty list of impacts on level " + level);
|
||||
throw new CheckIndexException("Got empty list of impacts on level " + impactsLevel);
|
||||
}
|
||||
Impact first = perLevelImpacts.get(0);
|
||||
if (first.freq < 1) {
|
||||
|
@ -2433,9 +2416,9 @@ public final class CheckIndex implements Closeable {
|
|||
"Impacts are not ordered or contain dups, got " + previous + " then " + impact);
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
// Make sure that impacts at level N trigger better scores than an level N-1
|
||||
Iterator<Impact> previousIt = impacts.getImpacts(level - 1).iterator();
|
||||
if (impactsLevel > 0) {
|
||||
// Make sure that impacts at level N trigger better scores than an impactsLevel N-1
|
||||
Iterator<Impact> previousIt = impacts.getImpacts(impactsLevel - 1).iterator();
|
||||
previous = previousIt.next();
|
||||
Iterator<Impact> it = perLevelImpacts.iterator();
|
||||
Impact impact = it.next();
|
||||
|
@ -2451,9 +2434,9 @@ public final class CheckIndex implements Closeable {
|
|||
"Found impact "
|
||||
+ previous
|
||||
+ " on level "
|
||||
+ (level - 1)
|
||||
+ (impactsLevel - 1)
|
||||
+ " but no impact on level "
|
||||
+ level
|
||||
+ impactsLevel
|
||||
+ " triggers a better score: "
|
||||
+ perLevelImpacts);
|
||||
}
|
||||
|
@ -2470,7 +2453,7 @@ public final class CheckIndex implements Closeable {
|
|||
*/
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream)
|
||||
throws IOException {
|
||||
return testPostings(reader, infoStream, false, true, false);
|
||||
return testPostings(reader, infoStream, false, Level.MIN_LEVEL_FOR_SLOW_CHECKS, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2479,15 +2462,11 @@ public final class CheckIndex implements Closeable {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermIndexStatus testPostings(
|
||||
CodecReader reader,
|
||||
PrintStream infoStream,
|
||||
boolean verbose,
|
||||
boolean doSlowChecks,
|
||||
boolean failFast)
|
||||
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
|
||||
throws IOException {
|
||||
|
||||
// TODO: we should go and verify term vectors match, if
|
||||
// doSlowChecks is on...
|
||||
// TODO: we should go and verify term vectors match, if the Level is high enough to
|
||||
// include slow checks
|
||||
Status.TermIndexStatus status;
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
||||
|
@ -2518,7 +2497,7 @@ public final class CheckIndex implements Closeable {
|
|||
false,
|
||||
infoStream,
|
||||
verbose,
|
||||
doSlowChecks);
|
||||
level);
|
||||
} catch (Throwable e) {
|
||||
if (failFast) {
|
||||
throw IOUtils.rethrowAlways(e);
|
||||
|
@ -3661,7 +3640,7 @@ public final class CheckIndex implements Closeable {
|
|||
*/
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream)
|
||||
throws IOException {
|
||||
return testTermVectors(reader, infoStream, false, false, false);
|
||||
return testTermVectors(reader, infoStream, false, Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3670,11 +3649,7 @@ public final class CheckIndex implements Closeable {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermVectorStatus testTermVectors(
|
||||
CodecReader reader,
|
||||
PrintStream infoStream,
|
||||
boolean verbose,
|
||||
boolean doSlowChecks,
|
||||
boolean failFast)
|
||||
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
|
||||
throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
final Status.TermVectorStatus status = new Status.TermVectorStatus();
|
||||
|
@ -3687,14 +3662,14 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
PostingsEnum postings = null;
|
||||
|
||||
// Only used if doSlowChecks is true:
|
||||
// Only used if the Level is high enough to include slow checks:
|
||||
PostingsEnum postingsDocs = null;
|
||||
|
||||
final Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
FieldsProducer postingsFields;
|
||||
// TODO: testTermsIndex
|
||||
if (doSlowChecks) {
|
||||
if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
|
||||
postingsFields = reader.getPostingsReader();
|
||||
if (postingsFields != null) {
|
||||
postingsFields = postingsFields.getMergeInstance();
|
||||
|
@ -3718,8 +3693,7 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
if (tfv != null) {
|
||||
// First run with no deletions:
|
||||
checkFields(
|
||||
tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, doSlowChecks);
|
||||
checkFields(tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, level);
|
||||
|
||||
// Only agg stats if the doc is live:
|
||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
||||
|
@ -3744,7 +3718,7 @@ public final class CheckIndex implements Closeable {
|
|||
+ " but FieldInfo has storeTermVector=false");
|
||||
}
|
||||
|
||||
if (doSlowChecks) {
|
||||
if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
|
||||
Terms terms = tfv.terms(field);
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
final boolean postingsHasFreq =
|
||||
|
@ -4047,9 +4021,8 @@ public final class CheckIndex implements Closeable {
|
|||
/** Run-time configuration options for CheckIndex commands. */
|
||||
public static class Options {
|
||||
boolean doExorcise = false;
|
||||
boolean doSlowChecks = false;
|
||||
boolean verbose = false;
|
||||
boolean doChecksumsOnly = false;
|
||||
int level = Level.DEFAULT_VALUE;
|
||||
int threadCount;
|
||||
List<String> onlySegments = new ArrayList<>();
|
||||
String indexPath = null;
|
||||
|
@ -4113,6 +4086,42 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Class with static variables with information about CheckIndex's -level parameter. */
|
||||
public static class Level {
|
||||
private Level() {}
|
||||
|
||||
/** Minimum valid level. */
|
||||
public static final int MIN_VALUE = 1;
|
||||
|
||||
/** Maximum valid level. */
|
||||
public static final int MAX_VALUE = 3;
|
||||
|
||||
/** The default level if none is specified. */
|
||||
public static final int DEFAULT_VALUE = MIN_VALUE;
|
||||
|
||||
/** Minimum level required to run checksum checks. */
|
||||
public static final int MIN_LEVEL_FOR_CHECKSUM_CHECKS = 1;
|
||||
|
||||
/** Minimum level required to run integrity checks. */
|
||||
public static final int MIN_LEVEL_FOR_INTEGRITY_CHECKS = 2;
|
||||
|
||||
/** Minimum level required to run slow checks. */
|
||||
public static final int MIN_LEVEL_FOR_SLOW_CHECKS = 3;
|
||||
|
||||
/** Checks if given level value is within the allowed bounds else it raises an Exception. */
|
||||
public static void checkIfLevelInBounds(int levelVal) throws IllegalArgumentException {
|
||||
if (levelVal < Level.MIN_VALUE || levelVal > Level.MAX_VALUE) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"ERROR: given value: '%d' for -level option is out of bounds. Please use a value from '%d'->'%d'",
|
||||
levelVal,
|
||||
Level.MIN_VALUE,
|
||||
Level.MAX_VALUE));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse command line args into fields
|
||||
*
|
||||
|
@ -4127,15 +4136,29 @@ public final class CheckIndex implements Closeable {
|
|||
int i = 0;
|
||||
while (i < args.length) {
|
||||
String arg = args[i];
|
||||
if ("-fast".equals(arg)) {
|
||||
opts.doChecksumsOnly = true;
|
||||
if ("-level".equals(arg)) {
|
||||
if (i == args.length - 1) {
|
||||
throw new IllegalArgumentException("ERROR: missing value for -level option");
|
||||
}
|
||||
i++;
|
||||
int level = Integer.parseInt(args[i]);
|
||||
Level.checkIfLevelInBounds(level);
|
||||
opts.level = level;
|
||||
} else if ("-fast".equals(arg)) {
|
||||
// Deprecated. Remove in Lucene 11.
|
||||
System.err.println(
|
||||
"-fast is deprecated, use '-level 1' for explicitly verifying file checksums only. This is also now the default "
|
||||
+ "behaviour!");
|
||||
} else if ("-slow".equals(arg)) {
|
||||
// Deprecated. Remove in Lucene 11.
|
||||
System.err.println("-slow is deprecated, use '-level 3' instead for slow checks");
|
||||
opts.level = Level.MIN_LEVEL_FOR_SLOW_CHECKS;
|
||||
} else if ("-exorcise".equals(arg)) {
|
||||
opts.doExorcise = true;
|
||||
} else if ("-crossCheckTermVectors".equals(arg)) {
|
||||
System.err.println("-crossCheckTermVectors is deprecated, use -slow instead");
|
||||
opts.doSlowChecks = true;
|
||||
} else if ("-slow".equals(arg)) {
|
||||
opts.doSlowChecks = true;
|
||||
// Deprecated. Remove in Lucene 11.
|
||||
System.err.println("-crossCheckTermVectors is deprecated, use '-level 3' instead");
|
||||
opts.level = Level.MAX_VALUE;
|
||||
} else if (arg.equals("-verbose")) {
|
||||
opts.verbose = true;
|
||||
} else if (arg.equals("-segment")) {
|
||||
|
@ -4172,11 +4195,13 @@ public final class CheckIndex implements Closeable {
|
|||
if (opts.indexPath == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"\nERROR: index path not specified"
|
||||
+ "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-slow] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n"
|
||||
+ "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-level X] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n"
|
||||
+ "\n"
|
||||
+ " -exorcise: actually write a new segments_N file, removing any problematic segments\n"
|
||||
+ " -fast: just verify file checksums, omitting logical integrity checks\n"
|
||||
+ " -slow: do additional slow checks; THIS IS VERY SLOW!\n"
|
||||
+ " -level X: sets the detail level of the check. The higher the value, the more checks are done.\n"
|
||||
+ " 1 - (Default) Checksum checks only.\n"
|
||||
+ " 2 - All level 1 checks + logical integrity checks.\n"
|
||||
+ " 3 - All level 2 checks + slow checks.\n"
|
||||
+ " -codec X: when exorcising, codec to write the new segments_N file with\n"
|
||||
+ " -verbose: print additional details\n"
|
||||
+ " -segment X: only check the specified segments. This can be specified multiple\n"
|
||||
|
@ -4191,7 +4216,8 @@ public final class CheckIndex implements Closeable {
|
|||
+ "If no package is specified the "
|
||||
+ FSDirectory.class.getPackage().getName()
|
||||
+ " package will be used.\n"
|
||||
+ "\n"
|
||||
+ "CheckIndex only verifies file checksums as default.\n"
|
||||
+ "Use -level with value of '2' or higher if you also want to check segment file contents.\n\n"
|
||||
+ "**WARNING**: -exorcise *LOSES DATA*. This should only be used on an emergency basis as it will cause\n"
|
||||
+ "documents (perhaps many) to be permanently removed from the index. Always make\n"
|
||||
+ "a backup copy of your index before running this! Do not run this tool on an index\n"
|
||||
|
@ -4213,10 +4239,6 @@ public final class CheckIndex implements Closeable {
|
|||
throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
|
||||
}
|
||||
|
||||
if (opts.doChecksumsOnly && opts.doSlowChecks) {
|
||||
throw new IllegalArgumentException("ERROR: cannot specify both -fast and -slow");
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
|
@ -4227,8 +4249,7 @@ public final class CheckIndex implements Closeable {
|
|||
* @return 0 iff the index is clean, 1 otherwise
|
||||
*/
|
||||
public int doCheck(Options opts) throws IOException, InterruptedException {
|
||||
setDoSlowChecks(opts.doSlowChecks);
|
||||
setChecksumsOnly(opts.doChecksumsOnly);
|
||||
setLevel(opts.level);
|
||||
setInfoStream(opts.out, opts.verbose);
|
||||
// user provided thread count via command line argument, overriding the default with user
|
||||
// provided value
|
||||
|
|
|
@ -166,7 +166,11 @@ public class TestAllFilesDetectTruncation extends LuceneTestCase {
|
|||
expectThrows(Exception.class, () -> DirectoryReader.open(dirCopy).close());
|
||||
|
||||
// CheckIndex should also fail:
|
||||
expectThrows(Exception.class, () -> TestUtil.checkIndex(dirCopy, true, true, true, null));
|
||||
expectThrows(
|
||||
Exception.class,
|
||||
() ->
|
||||
TestUtil.checkIndex(
|
||||
dirCopy, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS, true, true, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,7 +139,9 @@ public class TestCheckIndex extends BaseTestCheckIndex {
|
|||
}
|
||||
|
||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||
CheckIndex.Status status =
|
||||
TestUtil.checkIndex(
|
||||
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
|
||||
|
||||
assertEquals(1, status.segmentInfos.size());
|
||||
|
||||
|
|
|
@ -2714,7 +2714,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
// Make sure CheckIndex includes id output:
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(d);
|
||||
checker.setDoSlowChecks(false);
|
||||
checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex(null);
|
||||
String s = bos.toString(IOUtils.UTF_8);
|
||||
|
|
|
@ -731,7 +731,9 @@ public class TestPointValues extends LuceneTestCase {
|
|||
w.close();
|
||||
|
||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||
CheckIndex.Status status =
|
||||
TestUtil.checkIndex(
|
||||
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
|
||||
assertEquals(1, status.segmentInfos.size());
|
||||
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
||||
// total 3 point values were index:
|
||||
|
|
|
@ -121,7 +121,9 @@ public class TestSwappedIndexFiles extends LuceneTestCase {
|
|||
EOFException.class,
|
||||
IndexFormatTooOldException.class,
|
||||
CheckIndex.CheckIndexException.class),
|
||||
() -> TestUtil.checkIndex(dirCopy, true, true, true, null));
|
||||
() ->
|
||||
TestUtil.checkIndex(
|
||||
dirCopy, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS, true, true, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1270,7 +1270,9 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
|
|||
}
|
||||
|
||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||
CheckIndex.Status status =
|
||||
TestUtil.checkIndex(
|
||||
dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, output);
|
||||
assertEquals(1, status.segmentInfos.size());
|
||||
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
||||
// total 3 vector values were indexed:
|
||||
|
|
|
@ -59,6 +59,7 @@ public class BaseTestCheckIndex extends LuceneTestCase {
|
|||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS);
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (indexStatus.clean == false) {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.tests.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
|
@ -29,7 +30,7 @@ import org.apache.lucene.tests.util.TestUtil;
|
|||
public abstract class BaseDirectoryWrapper extends FilterDirectory {
|
||||
|
||||
private boolean checkIndexOnClose = true;
|
||||
private boolean doSlowChecksOnClose = true;
|
||||
private int levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS;
|
||||
protected volatile boolean isOpen = true;
|
||||
|
||||
protected BaseDirectoryWrapper(Directory delegate) {
|
||||
|
@ -41,7 +42,7 @@ public abstract class BaseDirectoryWrapper extends FilterDirectory {
|
|||
if (isOpen) {
|
||||
isOpen = false;
|
||||
if (checkIndexOnClose && DirectoryReader.indexExists(this)) {
|
||||
TestUtil.checkIndex(this, doSlowChecksOnClose);
|
||||
TestUtil.checkIndex(this, levelForCheckOnClose);
|
||||
}
|
||||
}
|
||||
super.close();
|
||||
|
@ -61,10 +62,15 @@ public abstract class BaseDirectoryWrapper extends FilterDirectory {
|
|||
}
|
||||
|
||||
public void setCrossCheckTermVectorsOnClose(boolean value) {
|
||||
this.doSlowChecksOnClose = value;
|
||||
// If true, we are enabling slow checks.
|
||||
if (value == true) {
|
||||
this.levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS;
|
||||
} else {
|
||||
this.levelForCheckOnClose = CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean getCrossCheckTermVectorsOnClose() {
|
||||
return doSlowChecksOnClose;
|
||||
public int getLevelForCheckOnClose() {
|
||||
return levelForCheckOnClose;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -906,7 +906,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||
// TestUtil#checkIndex checks segment concurrently using another thread, but making
|
||||
// call back to synchronized methods such as MockDirectoryWrapper#fileLength.
|
||||
// Hence passing concurrent = false to this method to turn off concurrent checks.
|
||||
TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, false, null);
|
||||
TestUtil.checkIndex(this, getLevelForCheckOnClose(), true, false, null);
|
||||
}
|
||||
|
||||
// TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
|
||||
|
|
|
@ -306,12 +306,11 @@ public final class TestUtil {
|
|||
* thrown; else, true is returned.
|
||||
*/
|
||||
public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
|
||||
return checkIndex(dir, true);
|
||||
return checkIndex(dir, CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS);
|
||||
}
|
||||
|
||||
public static CheckIndex.Status checkIndex(Directory dir, boolean doSlowChecks)
|
||||
throws IOException {
|
||||
return checkIndex(dir, doSlowChecks, false, true, null);
|
||||
public static CheckIndex.Status checkIndex(Directory dir, int level) throws IOException {
|
||||
return checkIndex(dir, level, false, true, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -319,11 +318,7 @@ public final class TestUtil {
|
|||
* moving on to other fields/segments to look for any other corruption.
|
||||
*/
|
||||
public static CheckIndex.Status checkIndex(
|
||||
Directory dir,
|
||||
boolean doSlowChecks,
|
||||
boolean failFast,
|
||||
boolean concurrent,
|
||||
ByteArrayOutputStream output)
|
||||
Directory dir, int level, boolean failFast, boolean concurrent, ByteArrayOutputStream output)
|
||||
throws IOException {
|
||||
if (output == null) {
|
||||
output = new ByteArrayOutputStream(1024);
|
||||
|
@ -332,7 +327,7 @@ public final class TestUtil {
|
|||
// some tests e.g. exception tests become much more complicated if they have to close the writer
|
||||
try (CheckIndex checker =
|
||||
new CheckIndex(dir, NoLockFactory.INSTANCE.obtainLock(dir, "bogus"))) {
|
||||
checker.setDoSlowChecks(doSlowChecks);
|
||||
checker.setLevel(level);
|
||||
checker.setFailFast(failFast);
|
||||
checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
|
||||
if (concurrent) {
|
||||
|
@ -361,11 +356,11 @@ public final class TestUtil {
|
|||
*/
|
||||
public static void checkReader(IndexReader reader) throws IOException {
|
||||
for (LeafReaderContext context : reader.leaves()) {
|
||||
checkReader(context.reader(), true);
|
||||
checkReader(context.reader(), CheckIndex.Level.MIN_LEVEL_FOR_SLOW_CHECKS);
|
||||
}
|
||||
}
|
||||
|
||||
public static void checkReader(LeafReader reader, boolean doSlowChecks) throws IOException {
|
||||
public static void checkReader(LeafReader reader, int level) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
|
||||
|
||||
|
@ -379,9 +374,9 @@ public final class TestUtil {
|
|||
CheckIndex.testLiveDocs(codecReader, infoStream, true);
|
||||
CheckIndex.testFieldInfos(codecReader, infoStream, true);
|
||||
CheckIndex.testFieldNorms(codecReader, infoStream, true);
|
||||
CheckIndex.testPostings(codecReader, infoStream, false, doSlowChecks, true);
|
||||
CheckIndex.testPostings(codecReader, infoStream, false, level, true);
|
||||
CheckIndex.testStoredFields(codecReader, infoStream, true);
|
||||
CheckIndex.testTermVectors(codecReader, infoStream, false, doSlowChecks, true);
|
||||
CheckIndex.testTermVectors(codecReader, infoStream, false, level, true);
|
||||
CheckIndex.testDocValues(codecReader, infoStream, true);
|
||||
CheckIndex.testPoints(codecReader, infoStream, true);
|
||||
|
||||
|
|
Loading…
Reference in New Issue