diff --git a/src/java/org/apache/lucene/index/CheckIndex.java b/src/java/org/apache/lucene/index/CheckIndex.java index 43441613baa..a43faf5415b 100644 --- a/src/java/org/apache/lucene/index/CheckIndex.java +++ b/src/java/org/apache/lucene/index/CheckIndex.java @@ -29,21 +29,166 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.ArrayList; +import org.apache.lucene.document.Fieldable; // for javadoc /** - * Basic tool to check the health of an index and write a - * new segments file that removes reference to problematic - * segments. There are many more checks that this tool - * could do but does not yet, eg: reconstructing a segments - * file by looking for all loadable segments (if no segments - * file is found), removing specifically specified segments, - * listing files that exist but are not referenced, etc. + * Basic tool and API to check the health of an index and + * write a new segments file that removes reference to + * problematic segments. + * + *
As this tool checks every byte in the index, on a large + * index it can take quite a long time to run. + * + *
WARNING: this tool and API is new and + * experimental and is subject to suddenly change in the + * next release. Please make a complete backup of your + * index before using this to fix your index! */ - public class CheckIndex { + /** Default PrintStream for all CheckIndex instances. + * @deprecated Use {@link #setInfoStream} per instance, + * instead. */ public static PrintStream out = null; + private PrintStream infoStream; + private Directory dir; + + /** + * Returned from {@link #checkIndex()} detailing the health and status of the index. + * + *
WARNING: this API is new and experimental and is
+ * subject to suddenly change in the next release.
+ **/
+
+ public static class Status {
+
+ /** True if no problems were found with the index. */
+ public boolean clean;
+
+ /** True if we were unable to locate and load the segments_N file. */
+ public boolean missingSegments;
+
+ /** True if we were unable to open the segments_N file. */
+ public boolean cantOpenSegments;
+
+ /** True if we were unable to read the version number from segments_N file. */
+ public boolean missingSegmentVersion;
+
+ /** Name of latest segments_N file in the index. */
+ public String segmentsFileName;
+
+ /** Number of segments in the index. */
+ public int numSegments;
+
+ /** String description of the version of the index. */
+ public String segmentFormat;
+
+ /** Empty unless you passed specific segments list to check as optional 3rd argument.
+ * @see CheckIndex#checkIndex(List) */
+ public List/* As this method checks every byte in the index, on a large
+ * index it can take quite a long time to run.
+ *
+ * WARNING: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status checkIndex() throws IOException {
+ return checkIndex(null);
+ }
+
+ /** Returns a {@link Status} instance detailing
+ * the state of the index.
+ *
+ * @param onlySegments list of specific segment names to check
+ *
+ * As this method checks every byte in the specified
+ * segments, on a large index it can take quite a long
+ * time to run.
+ *
+ * WARNING: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status checkIndex(List onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance();
SegmentInfos sis = new SegmentInfos();
- CheckIndexStatus result = new CheckIndexStatus();
+ Status result = new Status();
result.dir = dir;
try {
sis.read(dir);
} catch (Throwable t) {
msg("ERROR: could not read any segments file in directory");
result.missingSegments = true;
- t.printStackTrace(out);
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
return result;
}
@@ -89,7 +271,8 @@ public class CheckIndex {
input = dir.openInput(segmentsFileName);
} catch (Throwable t) {
msg("ERROR: could not open segments file in directory");
- t.printStackTrace(out);
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
result.cantOpenSegments = true;
return result;
}
@@ -98,7 +281,8 @@ public class CheckIndex {
format = input.readInt();
} catch (Throwable t) {
msg("ERROR: could not read segment file version in directory");
- t.printStackTrace(out);
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
result.missingSegmentVersion = true;
return result;
} finally {
@@ -138,10 +322,13 @@ public class CheckIndex {
result.segmentFormat = sFormat;
if (onlySegments != null) {
- out.print("\nChecking only these segments:");
+ result.partial = true;
+ if (infoStream != null)
+ infoStream.print("\nChecking only these segments:");
Iterator it = onlySegments.iterator();
while (it.hasNext()) {
- out.print(" " + it.next());
+ if (infoStream != null)
+ infoStream.print(" " + it.next());
}
result.segmentsChecked.addAll(onlySegments);
msg(":");
@@ -161,7 +348,7 @@ public class CheckIndex {
final SegmentInfo info = sis.info(i);
if (onlySegments != null && !onlySegments.contains(info.name))
continue;
- CheckIndexStatus.SegmentInfoStatus segInfoStat = new CheckIndexStatus.SegmentInfoStatus();
+ Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
result.segmentInfos.add(segInfoStat);
msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
segInfoStat.name = info.name;
@@ -200,9 +387,9 @@ public class CheckIndex {
msg(" has deletions [delFileName=" + delFileName + "]");
segInfoStat.hasDeletions = true;
segInfoStat.deletionsFileName = delFileName;
-
}
- out.print(" test: open reader.........");
+ if (infoStream != null)
+ infoStream.print(" test: open reader.........");
reader = SegmentReader.get(info);
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
@@ -219,7 +406,8 @@ public class CheckIndex {
msg("OK");
}
- out.print(" test: fields, norms.......");
+ if (infoStream != null)
+ infoStream.print(" test: fields, norms.......");
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
Iterator it = fieldNames.iterator();
while(it.hasNext()) {
@@ -231,7 +419,8 @@ public class CheckIndex {
}
msg("OK [" + fieldNames.size() + " fields]");
segInfoStat.numFields = fieldNames.size();
- out.print(" test: terms, freq, prox...");
+ if (infoStream != null)
+ infoStream.print(" test: terms, freq, prox...");
final TermEnum termEnum = reader.terms();
final TermPositions termPositions = reader.termPositions();
@@ -288,7 +477,8 @@ public class CheckIndex {
msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
- out.print(" test: stored fields.......");
+ if (infoStream != null)
+ infoStream.print(" test: stored fields.......");
int docCount = 0;
long totFields = 0;
for(int j=0;j WARNING: this writes a
+ * new segments file into the index, effectively removing
+ * all documents in broken segments from the index.
+ * BE CAREFUL.
+ *
+ * WARNING: Make sure you only call this when the
+ * index is not opened by any writer. */
+ public void fixIndex(Status result) throws IOException {
+ if (result.partial)
+ throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
result.newSegments.commit(result.dir);
}
- static boolean assertsOn;
+ private static boolean assertsOn;
private static boolean testAsserts() {
assertsOn = true;
return true;
}
- private static void msg(String msg) {
- if (out != null) {
- out.println(msg);
- }
+ private static boolean assertsOn() {
+ assert testAsserts();
+ return assertsOn;
}
- public static void main(String[] args) throws Throwable {
+ /** Command-line interface to check and fix an index.
+
+
+ Run it like this:
+ WARNING: Run without -fix, this tool will open the index, report version information
+ and report any exceptions it hits and what action it would take if -fix were
+ specified. With -fix, this tool will remove any segments that have issues and
+ write a new segments_N file. This means all documents contained in the affected
+ segments will be removed.
+
+
+ This tool exits with exit code 1 if the index cannot be opened or has any
+ corruption, else 0.
+ */
+ public static void main(String[] args) throws IOException {
boolean doFix = false;
List onlySegments = new ArrayList();
@@ -380,14 +610,14 @@ public class CheckIndex {
i++;
} else if (args[i].equals("-segment")) {
if (i == args.length-1) {
- msg("ERROR: missing name for -segment option");
+ System.out.println("ERROR: missing name for -segment option");
System.exit(1);
}
onlySegments.add(args[i+1]);
i += 2;
} else {
if (indexPath != null) {
- msg("ERROR: unexpected extra argument '" + args[i] + "'");
+ System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
System.exit(1);
}
indexPath = args[i];
@@ -396,8 +626,8 @@ public class CheckIndex {
}
if (indexPath == null) {
- msg("\nERROR: index path not specified");
- msg("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
+ System.out.println("\nERROR: index path not specified");
+ System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
@@ -415,40 +645,42 @@ public class CheckIndex {
"write a new segments_N file. This means all documents contained in the affected\n" +
"segments will be removed.\n" +
"\n" +
- "This tool exits with exit code 1 if the index cannot be opened or has has any\n" +
+ "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
"corruption, else 0.\n");
System.exit(1);
}
+ if (!assertsOn())
+ System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+
if (onlySegments.size() == 0)
onlySegments = null;
else if (doFix) {
- msg("ERROR: cannot specify both -fix and -segment");
+ System.out.println("ERROR: cannot specify both -fix and -segment");
System.exit(1);
}
- assert testAsserts();
- if (!assertsOn)
- msg("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled");
-
- msg("\nOpening index @ " + indexPath + "\n");
+ System.out.println("\nOpening index @ " + indexPath + "\n");
Directory dir = null;
try {
dir = FSDirectory.getDirectory(indexPath);
} catch (Throwable t) {
- msg("ERROR: could not open directory \"" + indexPath + "\"; exiting");
- t.printStackTrace(out);
+ System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ t.printStackTrace(System.out);
System.exit(1);
}
- CheckIndexStatus result = check(dir, doFix, onlySegments);
+ CheckIndex checker = new CheckIndex(dir);
+ checker.setInfoStream(System.out);
+
+ Status result = checker.checkIndex(onlySegments);
if (!result.clean) {
- if (!doFix){
- msg("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ if (!doFix) {
+ System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
} else {
- msg("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
- msg("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
for(int s=0;s<5;s++) {
try {
Thread.sleep(1000);
@@ -457,15 +689,15 @@ public class CheckIndex {
s--;
continue;
}
- msg(" " + (5-i) + "...");
+ System.out.println(" " + (5-s) + "...");
}
- msg("Writing...");
- CheckIndex.fix(result);
+ System.out.println("Writing...");
+ checker.fixIndex(result);
+ System.out.println("OK");
+ System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
}
- msg("OK");
- msg("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
}
- msg("");
+ System.out.println("");
final int exitCode;
if (result != null && result.clean == true)
diff --git a/src/java/org/apache/lucene/index/CheckIndexStatus.java b/src/java/org/apache/lucene/index/CheckIndexStatus.java
deleted file mode 100644
index 3202d2c2fe6..00000000000
--- a/src/java/org/apache/lucene/index/CheckIndexStatus.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.store.Directory;
-
-import java.util.List;
-import java.util.ArrayList;
-
-
-/**
- *
- *
- **/
-public class CheckIndexStatus {
-
- public boolean clean;
-
-
- public boolean missingSegments;
- public boolean cantOpenSegments;
- public boolean missingSegmentVersion;
-
-
- public String segmentsFileName;
- public int numSegments;
- public String segmentFormat;
- public List/*
+ java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+
+
+
+
+ -fix
: actually write a new segments_N file, removing any problematic segments
+
+ -segment X
: only check the specified
+ segment(s). This can be specified multiple times,
+ to check more than one segment, eg -segment _2
+ -segment _a
. You can't use this with the -fix
+ option.
+ -fix
should only be used on an emergency basis as it will cause
+ documents (perhaps many) to be permanently removed from the index. Always make
+ a backup copy of your index before running this! Do not run this tool on an index
+ that is actively being written to. You have been warned!
+
+