LUCENE-5924: rename CheckIndex -fix option and add more warnings about what it actually does

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1628579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-10-01 00:12:56 +00:00
parent 04a5e3a408
commit d67688973d
2 changed files with 33 additions and 30 deletions

View File

@ -139,6 +139,9 @@ API Changes
* LUCENE-5938: Removed MultiTermQuery.ConstantScoreAutoRewrite as * LUCENE-5938: Removed MultiTermQuery.ConstantScoreAutoRewrite as
MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is usually better. (Adrien Grand) MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is usually better. (Adrien Grand)
* LUCENE-5924: Rename CheckIndex -fix option to -exorcise. This option does not
actually fix the index, it just drops data. (Robert Muir)
Bug Fixes Bug Fixes
* LUCENE-5650: Enforce read-only access to any path outside the temporary * LUCENE-5650: Enforce read-only access to any path outside the temporary

View File

@ -60,7 +60,7 @@ import org.apache.lucene.util.Version;
* index it can take quite a long time to run. * index it can take quite a long time to run.
* *
* @lucene.experimental Please make a complete backup of your * @lucene.experimental Please make a complete backup of your
* index before using this to fix your index! * index before using this to exorcise corrupted documents from your index!
*/ */
public class CheckIndex { public class CheckIndex {
@ -111,7 +111,7 @@ public class CheckIndex {
/** /**
* SegmentInfos instance containing only segments that * SegmentInfos instance containing only segments that
* had no problems (this is used with the {@link CheckIndex#fixIndex} * had no problems (this is used with the {@link CheckIndex#exorciseIndex}
* method to repair the index. * method to repair the index.
*/ */
SegmentInfos newSegments; SegmentInfos newSegments;
@ -677,7 +677,7 @@ public class CheckIndex {
} }
msg(infoStream, "FAILED"); msg(infoStream, "FAILED");
String comment; String comment;
comment = "fixIndex() would remove reference to this segment"; comment = "exorciseIndex() would remove reference to this segment";
msg(infoStream, " WARNING: " + comment + "; full exception:"); msg(infoStream, " WARNING: " + comment + "; full exception:");
if (infoStream != null) if (infoStream != null)
t.printStackTrace(infoStream); t.printStackTrace(infoStream);
@ -2011,9 +2011,9 @@ public class CheckIndex {
* *
* <p><b>WARNING</b>: Make sure you only call this when the * <p><b>WARNING</b>: Make sure you only call this when the
* index is not opened by any writer. */ * index is not opened by any writer. */
public void fixIndex(Status result) throws IOException { public void exorciseIndex(Status result) throws IOException {
if (result.partial) if (result.partial)
throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); throw new IllegalArgumentException("can only exorcise an index that was fully checked (this status checked a subset of segments)");
result.newSegments.changed(); result.newSegments.changed();
result.newSegments.commit(result.dir); result.newSegments.commit(result.dir);
} }
@ -2030,31 +2030,31 @@ public class CheckIndex {
return assertsOn; return assertsOn;
} }
/** Command-line interface to check and fix an index. /** Command-line interface to check and exorcise corrupt segments from an index.
<p> <p>
Run it like this: Run it like this:
<pre> <pre>
java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-verbose] [-segment X] [-segment Y] java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-verbose] [-segment X] [-segment Y]
</pre> </pre>
<ul> <ul>
<li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments <li><code>-exorcise</code>: actually write a new segments_N file, removing any problematic segments. *LOSES DATA*
<li><code>-segment X</code>: only check the specified <li><code>-segment X</code>: only check the specified
segment(s). This can be specified multiple times, segment(s). This can be specified multiple times,
to check more than one segment, eg <code>-segment _2 to check more than one segment, eg <code>-segment _2
-segment _a</code>. You can't use this with the -fix -segment _a</code>. You can't use this with the -exorcise
option. option.
</ul> </ul>
<p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause <p><b>WARNING</b>: <code>-exorcise</code> should only be used on an emergency basis as it will cause
documents (perhaps many) to be permanently removed from the index. Always make documents (perhaps many) to be permanently removed from the index. Always make
a backup copy of your index before running this! Do not run this tool on an index a backup copy of your index before running this! Do not run this tool on an index
that is actively being written to. You have been warned! that is actively being written to. You have been warned!
<p> Run without -fix, this tool will open the index, report version information <p> Run without -exorcise, this tool will open the index, report version information
and report any exceptions it hits and what action it would take if -fix were and report any exceptions it hits and what action it would take if -exorcise were
specified. With -fix, this tool will remove any segments that have issues and specified. With -exorcise, this tool will remove any segments that have issues and
write a new segments_N file. This means all documents contained in the affected write a new segments_N file. This means all documents contained in the affected
segments will be removed. segments will be removed.
@ -2064,7 +2064,7 @@ public class CheckIndex {
*/ */
public static void main(String[] args) throws IOException, InterruptedException { public static void main(String[] args) throws IOException, InterruptedException {
boolean doFix = false; boolean doExorcise = false;
boolean doCrossCheckTermVectors = false; boolean doCrossCheckTermVectors = false;
boolean verbose = false; boolean verbose = false;
List<String> onlySegments = new ArrayList<>(); List<String> onlySegments = new ArrayList<>();
@ -2073,8 +2073,8 @@ public class CheckIndex {
int i = 0; int i = 0;
while(i < args.length) { while(i < args.length) {
String arg = args[i]; String arg = args[i];
if ("-fix".equals(arg)) { if ("-exorcise".equals(arg)) {
doFix = true; doExorcise = true;
} else if ("-crossCheckTermVectors".equals(arg)) { } else if ("-crossCheckTermVectors".equals(arg)) {
doCrossCheckTermVectors = true; doCrossCheckTermVectors = true;
} else if (arg.equals("-verbose")) { } else if (arg.equals("-verbose")) {
@ -2105,26 +2105,26 @@ public class CheckIndex {
if (indexPath == null) { if (indexPath == null) {
System.out.println("\nERROR: index path not specified"); System.out.println("\nERROR: index path not specified");
System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" + System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
"\n" + "\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" + " -exorcise: actually write a new segments_N file, removing any problematic segments\n" +
" -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" + " -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
" -codec X: when fixing, codec to write the new segments_N file with\n" + " -codec X: when exorcising, codec to write the new segments_N file with\n" +
" -verbose: print additional details\n" + " -verbose: print additional details\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" + " -segment X: only check the specified segments. This can be specified multiple\n" +
" times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
" You can't use this with the -fix option\n" + " You can't use this with the -exorcise option\n" +
" -dir-impl X: use a specific " + FSDirectory.class.getSimpleName() + " implementation. " + " -dir-impl X: use a specific " + FSDirectory.class.getSimpleName() + " implementation. " +
"If no package is specified the " + FSDirectory.class.getPackage().getName() + " package will be used.\n" + "If no package is specified the " + FSDirectory.class.getPackage().getName() + " package will be used.\n" +
"\n" + "\n" +
"**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "**WARNING**: -exorcise *LOSES DATA*. This should only be used on an emergency basis as it will cause\n" +
"documents (perhaps many) to be permanently removed from the index. Always make\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" +
"a backup copy of your index before running this! Do not run this tool on an index\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" +
"that is actively being written to. You have been warned!\n" + "that is actively being written to. You have been warned!\n" +
"\n" + "\n" +
"Run without -fix, this tool will open the index, report version information\n" + "Run without -exorcise, this tool will open the index, report version information\n" +
"and report any exceptions it hits and what action it would take if -fix were\n" + "and report any exceptions it hits and what action it would take if -exorcise were\n" +
"specified. With -fix, this tool will remove any segments that have issues and\n" + "specified. With -exorcise, this tool will remove any segments that have issues and\n" +
"write a new segments_N file. This means all documents contained in the affected\n" + "write a new segments_N file. This means all documents contained in the affected\n" +
"segments will be removed.\n" + "segments will be removed.\n" +
"\n" + "\n" +
@ -2138,8 +2138,8 @@ public class CheckIndex {
if (onlySegments.size() == 0) if (onlySegments.size() == 0)
onlySegments = null; onlySegments = null;
else if (doFix) { else if (doExorcise) {
System.out.println("ERROR: cannot specify both -fix and -segment"); System.out.println("ERROR: cannot specify both -exorcise and -segment");
System.exit(1); System.exit(1);
} }
@ -2168,17 +2168,17 @@ public class CheckIndex {
} }
if (!result.clean) { if (!result.clean) {
if (!doFix) { if (!doExorcise) {
System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
} else { } else {
System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
for(int s=0;s<5;s++) { for(int s=0;s<5;s++) {
Thread.sleep(1000); Thread.sleep(1000);
System.out.println(" " + (5-s) + "..."); System.out.println(" " + (5-s) + "...");
} }
System.out.println("Writing..."); System.out.println("Writing...");
checker.fixIndex(result); checker.exorciseIndex(result);
System.out.println("OK"); System.out.println("OK");
System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\""); System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
} }