mirror of https://github.com/apache/lucene.git
LUCENE-1402: make CheckIndex back-compatible again; improve programmatic access
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@698909 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
798f4676ed
commit
d8a60238e3
|
@ -29,21 +29,166 @@ import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import org.apache.lucene.document.Fieldable; // for javadoc
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic tool to check the health of an index and write a
|
* Basic tool and API to check the health of an index and
|
||||||
* new segments file that removes reference to problematic
|
* write a new segments file that removes reference to
|
||||||
* segments. There are many more checks that this tool
|
* problematic segments.
|
||||||
* could do but does not yet, eg: reconstructing a segments
|
*
|
||||||
* file by looking for all loadable segments (if no segments
|
* <p>As this tool checks every byte in the index, on a large
|
||||||
* file is found), removing specifically specified segments,
|
* index it can take quite a long time to run.
|
||||||
* listing files that exist but are not referenced, etc.
|
*
|
||||||
|
* <p><b>WARNING</b>: this tool and API is new and
|
||||||
|
* experimental and is subject to suddenly change in the
|
||||||
|
* next release. Please make a complete backup of your
|
||||||
|
* index before using this to fix your index!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class CheckIndex {
|
public class CheckIndex {
|
||||||
|
|
||||||
|
/** Default PrintStream for all CheckIndex instances.
|
||||||
|
* @deprecated Use {@link #setInfoStream} per instance,
|
||||||
|
* instead. */
|
||||||
public static PrintStream out = null;
|
public static PrintStream out = null;
|
||||||
|
|
||||||
|
private PrintStream infoStream;
|
||||||
|
private Directory dir;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returned from {@link #checkIndex()} detailing the health and status of the index.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: this API is new and experimental and is
|
||||||
|
* subject to suddenly change in the next release.
|
||||||
|
**/
|
||||||
|
|
||||||
|
public static class Status {
|
||||||
|
|
||||||
|
/** True if no problems were found with the index. */
|
||||||
|
public boolean clean;
|
||||||
|
|
||||||
|
/** True if we were unable to locate and load the segments_N file. */
|
||||||
|
public boolean missingSegments;
|
||||||
|
|
||||||
|
/** True if we were unable to open the segments_N file. */
|
||||||
|
public boolean cantOpenSegments;
|
||||||
|
|
||||||
|
/** True if we were unable to read the version number from segments_N file. */
|
||||||
|
public boolean missingSegmentVersion;
|
||||||
|
|
||||||
|
/** Name of latest segments_N file in the index. */
|
||||||
|
public String segmentsFileName;
|
||||||
|
|
||||||
|
/** Number of segments in the index. */
|
||||||
|
public int numSegments;
|
||||||
|
|
||||||
|
/** String description of the version of the index. */
|
||||||
|
public String segmentFormat;
|
||||||
|
|
||||||
|
/** Empty unless you passed specific segments list to check as optional 3rd argument.
|
||||||
|
* @see CheckIndex#checkIndex(List) */
|
||||||
|
public List/*<String>*/ segmentsChecked = new ArrayList();
|
||||||
|
|
||||||
|
/** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
|
||||||
|
public boolean toolOutOfDate;
|
||||||
|
|
||||||
|
/** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
|
||||||
|
public List/*<SegmentInfoStatus*/ segmentInfos = new ArrayList();
|
||||||
|
|
||||||
|
/** Directory index is in. */
|
||||||
|
public Directory dir;
|
||||||
|
|
||||||
|
/** SegmentInfos instance containing only segments that
|
||||||
|
* had no problems (this is used with the {@link
|
||||||
|
* CheckIndex#fix} method to repair the index. */
|
||||||
|
SegmentInfos newSegments;
|
||||||
|
|
||||||
|
/** How many documents will be lost to bad segments. */
|
||||||
|
public int totLoseDocCount;
|
||||||
|
|
||||||
|
/** How many bad segments were found. */
|
||||||
|
public int numBadSegments;
|
||||||
|
|
||||||
|
/** True if we checked only specific segments ({@link
|
||||||
|
* #checkIndex(List)}) was called with non-null
|
||||||
|
* argument). */
|
||||||
|
public boolean partial;
|
||||||
|
|
||||||
|
/** Holds the status of each segment in the index.
|
||||||
|
* See {@link #segmentInfos}.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: this API is new and experimental and is
|
||||||
|
* subject to suddenly change in the next release.
|
||||||
|
*/
|
||||||
|
public static class SegmentInfoStatus {
|
||||||
|
/** Name of the segment. */
|
||||||
|
public String name;
|
||||||
|
|
||||||
|
/** Document count (does not take deletions into account). */
|
||||||
|
public int docCount;
|
||||||
|
|
||||||
|
/** True if segment is compound file format. */
|
||||||
|
public boolean compound;
|
||||||
|
|
||||||
|
/** Number of files referenced by this segment. */
|
||||||
|
public int numFiles;
|
||||||
|
|
||||||
|
/** Net size (MB) of the files referenced by this
|
||||||
|
* segment. */
|
||||||
|
public double sizeMB;
|
||||||
|
|
||||||
|
/** Doc store offset, if this segment shares the doc
|
||||||
|
* store files (stored fields and term vectors) with
|
||||||
|
* other segments. This is -1 if it does not share. */
|
||||||
|
public int docStoreOffset = -1;
|
||||||
|
|
||||||
|
/** String of the shared doc store segment, or null if
|
||||||
|
* this segment does not share the doc store files. */
|
||||||
|
public String docStoreSegment;
|
||||||
|
|
||||||
|
/** True if the shared doc store files are compound file
|
||||||
|
* format. */
|
||||||
|
public boolean docStoreCompoundFile;
|
||||||
|
|
||||||
|
/** True if this segment has pending deletions. */
|
||||||
|
public boolean hasDeletions;
|
||||||
|
|
||||||
|
/** Name of the current deletions file name. */
|
||||||
|
public String deletionsFileName;
|
||||||
|
|
||||||
|
/** Number of deleted documents. */
|
||||||
|
public int numDeleted;
|
||||||
|
|
||||||
|
/** True if we were able to open a SegmentReader on this
|
||||||
|
* segment. */
|
||||||
|
public boolean openReaderPassed;
|
||||||
|
|
||||||
|
/** Number of fields in this segment. */
|
||||||
|
int numFields;
|
||||||
|
|
||||||
|
/** True if at least one of the fields in this segment
|
||||||
|
* does not omitTf.
|
||||||
|
* @see Fieldable#setOmitTf */
|
||||||
|
public boolean hasProx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a new CheckIndex on the directory. */
|
||||||
|
public CheckIndex(Directory dir) {
|
||||||
|
this.dir = dir;
|
||||||
|
infoStream = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Set infoStream where messages should go. If null, no
|
||||||
|
* messages are printed */
|
||||||
|
public void setInfoStream(PrintStream out) {
|
||||||
|
infoStream = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void msg(String msg) {
|
||||||
|
if (infoStream != null)
|
||||||
|
infoStream.println(msg);
|
||||||
|
}
|
||||||
|
|
||||||
private static class MySegmentTermDocs extends SegmentTermDocs {
|
private static class MySegmentTermDocs extends SegmentTermDocs {
|
||||||
|
|
||||||
int delCount;
|
int delCount;
|
||||||
|
@ -62,23 +207,60 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if index is clean, else false.*/
|
/** Returns true if index is clean, else false.
|
||||||
public static CheckIndexStatus check(Directory dir, boolean doFix) throws IOException {
|
* @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex()} instead */
|
||||||
|
public static boolean check(Directory dir, boolean doFix) throws IOException {
|
||||||
return check(dir, doFix, null);
|
return check(dir, doFix, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if index is clean, else false.*/
|
/** Returns true if index is clean, else false.
|
||||||
public static CheckIndexStatus check(Directory dir, boolean doFix, List onlySegments) throws IOException {
|
* @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex(List)} instead */
|
||||||
|
public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException {
|
||||||
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
|
Status status = checker.checkIndex(onlySegments);
|
||||||
|
if (doFix && !status.clean)
|
||||||
|
checker.fixIndex(status);
|
||||||
|
|
||||||
|
return status.clean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a {@link Status} instance detailing
|
||||||
|
* the state of the index.
|
||||||
|
*
|
||||||
|
* <p>As this method checks every byte in the index, on a large
|
||||||
|
* index it can take quite a long time to run.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: make sure
|
||||||
|
* you only call this when the index is not opened by any
|
||||||
|
* writer. */
|
||||||
|
public Status checkIndex() throws IOException {
|
||||||
|
return checkIndex(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a {@link Status} instance detailing
|
||||||
|
* the state of the index.
|
||||||
|
*
|
||||||
|
* @param onlySegments list of specific segment names to check
|
||||||
|
*
|
||||||
|
* <p>As this method checks every byte in the specified
|
||||||
|
* segments, on a large index it can take quite a long
|
||||||
|
* time to run.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: make sure
|
||||||
|
* you only call this when the index is not opened by any
|
||||||
|
* writer. */
|
||||||
|
public Status checkIndex(List onlySegments) throws IOException {
|
||||||
NumberFormat nf = NumberFormat.getInstance();
|
NumberFormat nf = NumberFormat.getInstance();
|
||||||
SegmentInfos sis = new SegmentInfos();
|
SegmentInfos sis = new SegmentInfos();
|
||||||
CheckIndexStatus result = new CheckIndexStatus();
|
Status result = new Status();
|
||||||
result.dir = dir;
|
result.dir = dir;
|
||||||
try {
|
try {
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
msg("ERROR: could not read any segments file in directory");
|
msg("ERROR: could not read any segments file in directory");
|
||||||
result.missingSegments = true;
|
result.missingSegments = true;
|
||||||
t.printStackTrace(out);
|
if (infoStream != null)
|
||||||
|
t.printStackTrace(infoStream);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,7 +271,8 @@ public class CheckIndex {
|
||||||
input = dir.openInput(segmentsFileName);
|
input = dir.openInput(segmentsFileName);
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
msg("ERROR: could not open segments file in directory");
|
msg("ERROR: could not open segments file in directory");
|
||||||
t.printStackTrace(out);
|
if (infoStream != null)
|
||||||
|
t.printStackTrace(infoStream);
|
||||||
result.cantOpenSegments = true;
|
result.cantOpenSegments = true;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -98,7 +281,8 @@ public class CheckIndex {
|
||||||
format = input.readInt();
|
format = input.readInt();
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
msg("ERROR: could not read segment file version in directory");
|
msg("ERROR: could not read segment file version in directory");
|
||||||
t.printStackTrace(out);
|
if (infoStream != null)
|
||||||
|
t.printStackTrace(infoStream);
|
||||||
result.missingSegmentVersion = true;
|
result.missingSegmentVersion = true;
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -138,10 +322,13 @@ public class CheckIndex {
|
||||||
result.segmentFormat = sFormat;
|
result.segmentFormat = sFormat;
|
||||||
|
|
||||||
if (onlySegments != null) {
|
if (onlySegments != null) {
|
||||||
out.print("\nChecking only these segments:");
|
result.partial = true;
|
||||||
|
if (infoStream != null)
|
||||||
|
infoStream.print("\nChecking only these segments:");
|
||||||
Iterator it = onlySegments.iterator();
|
Iterator it = onlySegments.iterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
out.print(" " + it.next());
|
if (infoStream != null)
|
||||||
|
infoStream.print(" " + it.next());
|
||||||
}
|
}
|
||||||
result.segmentsChecked.addAll(onlySegments);
|
result.segmentsChecked.addAll(onlySegments);
|
||||||
msg(":");
|
msg(":");
|
||||||
|
@ -161,7 +348,7 @@ public class CheckIndex {
|
||||||
final SegmentInfo info = sis.info(i);
|
final SegmentInfo info = sis.info(i);
|
||||||
if (onlySegments != null && !onlySegments.contains(info.name))
|
if (onlySegments != null && !onlySegments.contains(info.name))
|
||||||
continue;
|
continue;
|
||||||
CheckIndexStatus.SegmentInfoStatus segInfoStat = new CheckIndexStatus.SegmentInfoStatus();
|
Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
|
||||||
result.segmentInfos.add(segInfoStat);
|
result.segmentInfos.add(segInfoStat);
|
||||||
msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
|
msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
|
||||||
segInfoStat.name = info.name;
|
segInfoStat.name = info.name;
|
||||||
|
@ -200,9 +387,9 @@ public class CheckIndex {
|
||||||
msg(" has deletions [delFileName=" + delFileName + "]");
|
msg(" has deletions [delFileName=" + delFileName + "]");
|
||||||
segInfoStat.hasDeletions = true;
|
segInfoStat.hasDeletions = true;
|
||||||
segInfoStat.deletionsFileName = delFileName;
|
segInfoStat.deletionsFileName = delFileName;
|
||||||
|
|
||||||
}
|
}
|
||||||
out.print(" test: open reader.........");
|
if (infoStream != null)
|
||||||
|
infoStream.print(" test: open reader.........");
|
||||||
reader = SegmentReader.get(info);
|
reader = SegmentReader.get(info);
|
||||||
final int numDocs = reader.numDocs();
|
final int numDocs = reader.numDocs();
|
||||||
toLoseDocCount = numDocs;
|
toLoseDocCount = numDocs;
|
||||||
|
@ -219,7 +406,8 @@ public class CheckIndex {
|
||||||
msg("OK");
|
msg("OK");
|
||||||
}
|
}
|
||||||
|
|
||||||
out.print(" test: fields, norms.......");
|
if (infoStream != null)
|
||||||
|
infoStream.print(" test: fields, norms.......");
|
||||||
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
|
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
|
||||||
Iterator it = fieldNames.iterator();
|
Iterator it = fieldNames.iterator();
|
||||||
while(it.hasNext()) {
|
while(it.hasNext()) {
|
||||||
|
@ -231,7 +419,8 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
msg("OK [" + fieldNames.size() + " fields]");
|
msg("OK [" + fieldNames.size() + " fields]");
|
||||||
segInfoStat.numFields = fieldNames.size();
|
segInfoStat.numFields = fieldNames.size();
|
||||||
out.print(" test: terms, freq, prox...");
|
if (infoStream != null)
|
||||||
|
infoStream.print(" test: terms, freq, prox...");
|
||||||
final TermEnum termEnum = reader.terms();
|
final TermEnum termEnum = reader.terms();
|
||||||
final TermPositions termPositions = reader.termPositions();
|
final TermPositions termPositions = reader.termPositions();
|
||||||
|
|
||||||
|
@ -288,7 +477,8 @@ public class CheckIndex {
|
||||||
|
|
||||||
msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
|
msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
|
||||||
|
|
||||||
out.print(" test: stored fields.......");
|
if (infoStream != null)
|
||||||
|
infoStream.print(" test: stored fields.......");
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
long totFields = 0;
|
long totFields = 0;
|
||||||
for(int j=0;j<info.docCount;j++)
|
for(int j=0;j<info.docCount;j++)
|
||||||
|
@ -303,7 +493,8 @@ public class CheckIndex {
|
||||||
|
|
||||||
msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
|
msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
|
||||||
|
|
||||||
out.print(" test: term vectors........");
|
if (infoStream != null)
|
||||||
|
infoStream.print(" test: term vectors........");
|
||||||
int totVectors = 0;
|
int totVectors = 0;
|
||||||
for(int j=0;j<info.docCount;j++)
|
for(int j=0;j<info.docCount;j++)
|
||||||
if (!reader.isDeleted(j)) {
|
if (!reader.isDeleted(j)) {
|
||||||
|
@ -318,12 +509,10 @@ public class CheckIndex {
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
msg("FAILED");
|
msg("FAILED");
|
||||||
String comment;
|
String comment;
|
||||||
if (doFix)
|
comment = "fixIndex() would remove reference to this segment";
|
||||||
comment = "will remove reference to this segment (-fix is specified)";
|
|
||||||
else
|
|
||||||
comment = "would remove reference to this segment (-fix was not specified)";
|
|
||||||
msg(" WARNING: " + comment + "; full exception:");
|
msg(" WARNING: " + comment + "; full exception:");
|
||||||
t.printStackTrace(out);
|
if (infoStream != null)
|
||||||
|
t.printStackTrace(infoStream);
|
||||||
msg("");
|
msg("");
|
||||||
result.totLoseDocCount += toLoseDocCount;
|
result.totLoseDocCount += toLoseDocCount;
|
||||||
result.numBadSegments++;
|
result.numBadSegments++;
|
||||||
|
@ -346,29 +535,70 @@ public class CheckIndex {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Repairs the index using previously returned result from
|
/** Repairs the index using previously returned result
|
||||||
* {@link #check}. <b>WARNING</b>: this writes a new
|
* from {@link #checkIndex}. Note that this does not
|
||||||
* segments file into the index, effectively removing
|
* remove any of the unreferenced files after it's done;
|
||||||
* all documents in broken segments from the index. BE
|
* you must separately open an {@link IndexWriter}, which
|
||||||
* CAREFUL. */
|
* deletes unreferenced files when it's created.
|
||||||
static public void fix(CheckIndexStatus result) throws IOException {
|
*
|
||||||
|
* <p><b>WARNING</b>: this writes a
|
||||||
|
* new segments file into the index, effectively removing
|
||||||
|
* all documents in broken segments from the index.
|
||||||
|
* BE CAREFUL.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: Make sure you only call this when the
|
||||||
|
* index is not opened by any writer. */
|
||||||
|
public void fixIndex(Status result) throws IOException {
|
||||||
|
if (result.partial)
|
||||||
|
throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
|
||||||
result.newSegments.commit(result.dir);
|
result.newSegments.commit(result.dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean assertsOn;
|
private static boolean assertsOn;
|
||||||
|
|
||||||
private static boolean testAsserts() {
|
private static boolean testAsserts() {
|
||||||
assertsOn = true;
|
assertsOn = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void msg(String msg) {
|
private static boolean assertsOn() {
|
||||||
if (out != null) {
|
assert testAsserts();
|
||||||
out.println(msg);
|
return assertsOn;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Throwable {
|
/** Command-line interface to check and fix an index.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Run it like this:
|
||||||
|
<pre>
|
||||||
|
java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
|
||||||
|
</pre>
|
||||||
|
<ul>
|
||||||
|
<li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments
|
||||||
|
|
||||||
|
<li><code>-segment X</code>: only check the specified
|
||||||
|
segment(s). This can be specified multiple times,
|
||||||
|
to check more than one segment, eg <code>-segment _2
|
||||||
|
-segment _a</code>. You can't use this with the -fix
|
||||||
|
option.
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
|
||||||
|
documents (perhaps many) to be permanently removed from the index. Always make
|
||||||
|
a backup copy of your index before running this! Do not run this tool on an index
|
||||||
|
that is actively being written to. You have been warned!
|
||||||
|
|
||||||
|
<p> Run without -fix, this tool will open the index, report version information
|
||||||
|
and report any exceptions it hits and what action it would take if -fix were
|
||||||
|
specified. With -fix, this tool will remove any segments that have issues and
|
||||||
|
write a new segments_N file. This means all documents contained in the affected
|
||||||
|
segments will be removed.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
This tool exits with exit code 1 if the index cannot be opened or has any
|
||||||
|
corruption, else 0.
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
|
||||||
boolean doFix = false;
|
boolean doFix = false;
|
||||||
List onlySegments = new ArrayList();
|
List onlySegments = new ArrayList();
|
||||||
|
@ -380,14 +610,14 @@ public class CheckIndex {
|
||||||
i++;
|
i++;
|
||||||
} else if (args[i].equals("-segment")) {
|
} else if (args[i].equals("-segment")) {
|
||||||
if (i == args.length-1) {
|
if (i == args.length-1) {
|
||||||
msg("ERROR: missing name for -segment option");
|
System.out.println("ERROR: missing name for -segment option");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
onlySegments.add(args[i+1]);
|
onlySegments.add(args[i+1]);
|
||||||
i += 2;
|
i += 2;
|
||||||
} else {
|
} else {
|
||||||
if (indexPath != null) {
|
if (indexPath != null) {
|
||||||
msg("ERROR: unexpected extra argument '" + args[i] + "'");
|
System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
indexPath = args[i];
|
indexPath = args[i];
|
||||||
|
@ -396,8 +626,8 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indexPath == null) {
|
if (indexPath == null) {
|
||||||
msg("\nERROR: index path not specified");
|
System.out.println("\nERROR: index path not specified");
|
||||||
msg("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
|
System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
|
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
|
||||||
" -segment X: only check the specified segments. This can be specified multiple\n" +
|
" -segment X: only check the specified segments. This can be specified multiple\n" +
|
||||||
|
@ -415,40 +645,42 @@ public class CheckIndex {
|
||||||
"write a new segments_N file. This means all documents contained in the affected\n" +
|
"write a new segments_N file. This means all documents contained in the affected\n" +
|
||||||
"segments will be removed.\n" +
|
"segments will be removed.\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"This tool exits with exit code 1 if the index cannot be opened or has has any\n" +
|
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
|
||||||
"corruption, else 0.\n");
|
"corruption, else 0.\n");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!assertsOn())
|
||||||
|
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
|
||||||
|
|
||||||
if (onlySegments.size() == 0)
|
if (onlySegments.size() == 0)
|
||||||
onlySegments = null;
|
onlySegments = null;
|
||||||
else if (doFix) {
|
else if (doFix) {
|
||||||
msg("ERROR: cannot specify both -fix and -segment");
|
System.out.println("ERROR: cannot specify both -fix and -segment");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert testAsserts();
|
System.out.println("\nOpening index @ " + indexPath + "\n");
|
||||||
if (!assertsOn)
|
|
||||||
msg("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled");
|
|
||||||
|
|
||||||
msg("\nOpening index @ " + indexPath + "\n");
|
|
||||||
Directory dir = null;
|
Directory dir = null;
|
||||||
try {
|
try {
|
||||||
dir = FSDirectory.getDirectory(indexPath);
|
dir = FSDirectory.getDirectory(indexPath);
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
msg("ERROR: could not open directory \"" + indexPath + "\"; exiting");
|
System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
|
||||||
t.printStackTrace(out);
|
t.printStackTrace(System.out);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckIndexStatus result = check(dir, doFix, onlySegments);
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
|
checker.setInfoStream(System.out);
|
||||||
|
|
||||||
|
Status result = checker.checkIndex(onlySegments);
|
||||||
|
|
||||||
if (!result.clean) {
|
if (!result.clean) {
|
||||||
if (!doFix){
|
if (!doFix) {
|
||||||
msg("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
|
System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
|
||||||
} else {
|
} else {
|
||||||
msg("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
|
System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
|
||||||
msg("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
|
System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
|
||||||
for(int s=0;s<5;s++) {
|
for(int s=0;s<5;s++) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
|
@ -457,15 +689,15 @@ public class CheckIndex {
|
||||||
s--;
|
s--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
msg(" " + (5-i) + "...");
|
System.out.println(" " + (5-s) + "...");
|
||||||
}
|
}
|
||||||
msg("Writing...");
|
System.out.println("Writing...");
|
||||||
CheckIndex.fix(result);
|
checker.fixIndex(result);
|
||||||
|
System.out.println("OK");
|
||||||
|
System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
|
||||||
}
|
}
|
||||||
msg("OK");
|
|
||||||
msg("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
|
|
||||||
}
|
}
|
||||||
msg("");
|
System.out.println("");
|
||||||
|
|
||||||
final int exitCode;
|
final int exitCode;
|
||||||
if (result != null && result.clean == true)
|
if (result != null && result.clean == true)
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*
|
|
||||||
**/
|
|
||||||
public class CheckIndexStatus {
|
|
||||||
|
|
||||||
public boolean clean;
|
|
||||||
|
|
||||||
|
|
||||||
public boolean missingSegments;
|
|
||||||
public boolean cantOpenSegments;
|
|
||||||
public boolean missingSegmentVersion;
|
|
||||||
|
|
||||||
|
|
||||||
public String segmentsFileName;
|
|
||||||
public int numSegments;
|
|
||||||
public String segmentFormat;
|
|
||||||
public List/*<String>*/ segmentsChecked = new ArrayList();
|
|
||||||
|
|
||||||
public boolean toolOutOfDate;
|
|
||||||
|
|
||||||
public List/*<SegmentInfoStatus*/ segmentInfos = new ArrayList();
|
|
||||||
public Directory dir;
|
|
||||||
public SegmentInfos newSegments;
|
|
||||||
public int totLoseDocCount;
|
|
||||||
public int numBadSegments;
|
|
||||||
|
|
||||||
public static class SegmentInfoStatus{
|
|
||||||
public String name;
|
|
||||||
public int docCount;
|
|
||||||
public boolean compound;
|
|
||||||
public int numFiles;
|
|
||||||
public double sizeMB;
|
|
||||||
public int docStoreOffset = -1;
|
|
||||||
public String docStoreSegment;
|
|
||||||
public boolean docStoreCompoundFile;
|
|
||||||
|
|
||||||
public boolean hasDeletions;
|
|
||||||
public String deletionsFileName;
|
|
||||||
public int numDeleted;
|
|
||||||
|
|
||||||
public boolean openReaderPassed;
|
|
||||||
|
|
||||||
int numFields;
|
|
||||||
|
|
||||||
public boolean hasProx;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -47,9 +47,9 @@ public class TestCheckIndex extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
CheckIndex.out = new PrintStream(bos);
|
checker.setInfoStream(new PrintStream(bos));
|
||||||
CheckIndexStatus indexStatus = CheckIndex.check(dir, false, null);
|
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||||
if (indexStatus.clean == false) {
|
if (indexStatus.clean == false) {
|
||||||
System.out.println("CheckIndex failed");
|
System.out.println("CheckIndex failed");
|
||||||
System.out.println(bos.toString());
|
System.out.println(bos.toString());
|
||||||
|
@ -57,6 +57,7 @@ public class TestCheckIndex extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final List onlySegments = new ArrayList();
|
final List onlySegments = new ArrayList();
|
||||||
onlySegments.add("_0");
|
onlySegments.add("_0");
|
||||||
assertTrue(CheckIndex.check(dir, false, onlySegments).clean == true);
|
|
||||||
|
assertTrue(checker.checkIndex(onlySegments).clean == true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.MergeScheduler;
|
import org.apache.lucene.index.MergeScheduler;
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
import org.apache.lucene.index.CheckIndex;
|
import org.apache.lucene.index.CheckIndex;
|
||||||
import org.apache.lucene.index.CheckIndexStatus;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
@ -60,10 +59,10 @@ public class _TestUtil {
|
||||||
* true is returned. */
|
* true is returned. */
|
||||||
public static boolean checkIndex(Directory dir) throws IOException {
|
public static boolean checkIndex(Directory dir) throws IOException {
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
CheckIndex.out = new PrintStream(bos);
|
|
||||||
|
|
||||||
//TODO: fix this
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
CheckIndexStatus indexStatus = CheckIndex.check(dir, false, null);
|
checker.setInfoStream(new PrintStream(bos));
|
||||||
|
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||||
if (indexStatus == null || indexStatus.clean == false) {
|
if (indexStatus == null || indexStatus.clean == false) {
|
||||||
System.out.println("CheckIndex failed");
|
System.out.println("CheckIndex failed");
|
||||||
System.out.println(bos.toString());
|
System.out.println(bos.toString());
|
||||||
|
|
Loading…
Reference in New Issue