mirror of https://github.com/apache/lucene.git
LUCENE-1625: return more status details in CheckIndex, broken out by component into separate status classes
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@788800 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2735779bf6
commit
890d53acfb
|
@ -276,6 +276,11 @@ Changes in runtime behavior
|
||||||
26. LUCENE-1703: Add IndexWriter.waitForMerges. (Tim Smith via Mike
|
26. LUCENE-1703: Add IndexWriter.waitForMerges. (Tim Smith via Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
27. LUCENE-1625: CheckIndex's programmatic API now returns separate
|
||||||
|
classes detailing the status of each component in the index, and
|
||||||
|
includes more detailed status than previously. (Tim Smith via
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
|
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
|
||||||
|
|
|
@ -179,6 +179,76 @@ public class CheckIndex {
|
||||||
* debugging details that IndexWriter records into
|
* debugging details that IndexWriter records into
|
||||||
* each segment it creates */
|
* each segment it creates */
|
||||||
public Map diagnostics;
|
public Map diagnostics;
|
||||||
|
|
||||||
|
/** Status for testing of field norms (null if field norms could not be tested). */
|
||||||
|
public FieldNormStatus fieldNormStatus;
|
||||||
|
|
||||||
|
/** Status for testing of indexed terms (null if indexed terms could not be tested). */
|
||||||
|
public TermIndexStatus termIndexStatus;
|
||||||
|
|
||||||
|
/** Status for testing of stored fields (null if stored fields could not be tested). */
|
||||||
|
public StoredFieldStatus storedFieldStatus;
|
||||||
|
|
||||||
|
/** Status for testing of term vectors (null if term vectors could not be tested). */
|
||||||
|
public TermVectorStatus termVectorStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Status from testing field norms.
|
||||||
|
*/
|
||||||
|
public static final class FieldNormStatus {
|
||||||
|
/** Number of fields successfully tested */
|
||||||
|
public long totFields = 0L;
|
||||||
|
|
||||||
|
/** Exception thrown during term index test (null on success) */
|
||||||
|
public Throwable error = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Status from testing term index.
|
||||||
|
*/
|
||||||
|
public static final class TermIndexStatus {
|
||||||
|
/** Total term count */
|
||||||
|
public long termCount = 0L;
|
||||||
|
|
||||||
|
/** Total frequency across all terms. */
|
||||||
|
public long totFreq = 0L;
|
||||||
|
|
||||||
|
/** Total number of positions. */
|
||||||
|
public long totPos = 0L;
|
||||||
|
|
||||||
|
/** Exception thrown during term index test (null on success) */
|
||||||
|
public Throwable error = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Status from testing stored fields.
|
||||||
|
*/
|
||||||
|
public static final class StoredFieldStatus {
|
||||||
|
|
||||||
|
/** Number of documents tested. */
|
||||||
|
public int docCount = 0;
|
||||||
|
|
||||||
|
/** Total number of stored fields tested. */
|
||||||
|
public long totFields = 0;
|
||||||
|
|
||||||
|
/** Exception thrown during stored fields test (null on success) */
|
||||||
|
public Throwable error = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Status from testing stored fields.
|
||||||
|
*/
|
||||||
|
public static final class TermVectorStatus {
|
||||||
|
|
||||||
|
/** Number of documents tested. */
|
||||||
|
public int docCount = 0;
|
||||||
|
|
||||||
|
/** Total number of term vectors tested. */
|
||||||
|
public long totVectors = 0;
|
||||||
|
|
||||||
|
/** Exception thrown during term vector test (null on success) */
|
||||||
|
public Throwable error = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,107 +513,38 @@ public class CheckIndex {
|
||||||
if (reader.maxDoc() != info.docCount)
|
if (reader.maxDoc() != info.docCount)
|
||||||
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
|
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
|
||||||
|
|
||||||
if (infoStream != null)
|
// Test getFieldNames()
|
||||||
infoStream.print(" test: fields, norms.......");
|
if (infoStream != null) {
|
||||||
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
|
infoStream.print(" test: fields..............");
|
||||||
Iterator it = fieldNames.iterator();
|
|
||||||
final byte[] b = new byte[reader.maxDoc()];
|
|
||||||
while(it.hasNext()) {
|
|
||||||
final String fieldName = (String) it.next();
|
|
||||||
reader.norms(fieldName, b, 0);
|
|
||||||
}
|
}
|
||||||
|
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
|
||||||
msg("OK [" + fieldNames.size() + " fields]");
|
msg("OK [" + fieldNames.size() + " fields]");
|
||||||
segInfoStat.numFields = fieldNames.size();
|
segInfoStat.numFields = fieldNames.size();
|
||||||
if (infoStream != null)
|
|
||||||
infoStream.print(" test: terms, freq, prox...");
|
|
||||||
final TermEnum termEnum = reader.terms();
|
|
||||||
final TermPositions termPositions = reader.termPositions();
|
|
||||||
|
|
||||||
// Used only to count up # deleted docs for this
|
// Test Field Norms
|
||||||
// term
|
segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
|
||||||
final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
|
|
||||||
|
|
||||||
long termCount = 0;
|
// Test the Term Index
|
||||||
long totFreq = 0;
|
segInfoStat.termIndexStatus = testTermIndex(info, reader);
|
||||||
long totPos = 0;
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
|
|
||||||
while(termEnum.next()) {
|
// Test Stored Fields
|
||||||
termCount++;
|
segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
|
||||||
final Term term = termEnum.term();
|
|
||||||
final int docFreq = termEnum.docFreq();
|
|
||||||
termPositions.seek(term);
|
|
||||||
int lastDoc = -1;
|
|
||||||
int freq0 = 0;
|
|
||||||
totFreq += docFreq;
|
|
||||||
while(termPositions.next()) {
|
|
||||||
freq0++;
|
|
||||||
final int doc = termPositions.doc();
|
|
||||||
final int freq = termPositions.freq();
|
|
||||||
if (doc <= lastDoc)
|
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
|
|
||||||
if (doc >= maxDoc)
|
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
|
|
||||||
|
|
||||||
lastDoc = doc;
|
// Test Term Vectors
|
||||||
if (freq <= 0)
|
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
|
||||||
|
|
||||||
int lastPos = -1;
|
// Rethrow the first exception we encountered
|
||||||
totPos += freq;
|
// This will cause stats for failed segments to be incremented properly
|
||||||
for(int j=0;j<freq;j++) {
|
if (segInfoStat.fieldNormStatus.error != null) {
|
||||||
final int pos = termPositions.nextPosition();
|
throw new RuntimeException("Field Norm test failed");
|
||||||
if (pos < -1)
|
} else if (segInfoStat.termIndexStatus.error != null) {
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
throw new RuntimeException("Term Index test failed");
|
||||||
if (pos < lastPos)
|
} else if (segInfoStat.storedFieldStatus.error != null) {
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
|
throw new RuntimeException("Stored Field test failed");
|
||||||
}
|
} else if (segInfoStat.termVectorStatus.error != null) {
|
||||||
|
throw new RuntimeException("Term Vector test failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now count how many deleted docs occurred in
|
|
||||||
// this term:
|
|
||||||
final int delCount;
|
|
||||||
if (reader.hasDeletions()) {
|
|
||||||
myTermDocs.seek(term);
|
|
||||||
while(myTermDocs.next()) {
|
|
||||||
}
|
|
||||||
delCount = myTermDocs.delCount;
|
|
||||||
} else
|
|
||||||
delCount = 0;
|
|
||||||
|
|
||||||
if (freq0 + delCount != docFreq)
|
|
||||||
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
|
|
||||||
|
|
||||||
if (infoStream != null)
|
|
||||||
infoStream.print(" test: stored fields.......");
|
|
||||||
int docCount = 0;
|
|
||||||
long totFields = 0;
|
|
||||||
for(int j=0;j<info.docCount;j++)
|
|
||||||
if (!reader.isDeleted(j)) {
|
|
||||||
docCount++;
|
|
||||||
Document doc = reader.document(j);
|
|
||||||
totFields += doc.getFields().size();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (docCount != reader.numDocs())
|
|
||||||
throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
|
|
||||||
|
|
||||||
msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
|
|
||||||
|
|
||||||
if (infoStream != null)
|
|
||||||
infoStream.print(" test: term vectors........");
|
|
||||||
int totVectors = 0;
|
|
||||||
for(int j=0;j<info.docCount;j++)
|
|
||||||
if (!reader.isDeleted(j)) {
|
|
||||||
TermFreqVector[] tfv = reader.getTermFreqVectors(j);
|
|
||||||
if (tfv != null)
|
|
||||||
totVectors += tfv.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
|
|
||||||
msg("");
|
msg("");
|
||||||
|
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
@ -575,6 +576,190 @@ public class CheckIndex {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test field norms.
|
||||||
|
*/
|
||||||
|
private Status.FieldNormStatus testFieldNorms(Collection fieldNames, SegmentReader reader) {
|
||||||
|
final Status.FieldNormStatus status = new Status.FieldNormStatus();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test Field Norms
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.print(" test: field norms.........");
|
||||||
|
}
|
||||||
|
Iterator it = fieldNames.iterator();
|
||||||
|
final byte[] b = new byte[reader.maxDoc()];
|
||||||
|
while (it.hasNext()) {
|
||||||
|
final String fieldName = (String) it.next();
|
||||||
|
reader.norms(fieldName, b, 0);
|
||||||
|
++status.totFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg("OK [" + status.totFields + " fields]");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||||
|
status.error = e;
|
||||||
|
if (infoStream != null) {
|
||||||
|
e.printStackTrace(infoStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the term index.
|
||||||
|
*/
|
||||||
|
private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
|
||||||
|
final Status.TermIndexStatus status = new Status.TermIndexStatus();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.print(" test: terms, freq, prox...");
|
||||||
|
}
|
||||||
|
|
||||||
|
final TermEnum termEnum = reader.terms();
|
||||||
|
final TermPositions termPositions = reader.termPositions();
|
||||||
|
|
||||||
|
// Used only to count up # deleted docs for this term
|
||||||
|
final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
|
||||||
|
|
||||||
|
final int maxDoc = reader.maxDoc();
|
||||||
|
|
||||||
|
while (termEnum.next()) {
|
||||||
|
status.termCount++;
|
||||||
|
final Term term = termEnum.term();
|
||||||
|
final int docFreq = termEnum.docFreq();
|
||||||
|
termPositions.seek(term);
|
||||||
|
int lastDoc = -1;
|
||||||
|
int freq0 = 0;
|
||||||
|
status.totFreq += docFreq;
|
||||||
|
while (termPositions.next()) {
|
||||||
|
freq0++;
|
||||||
|
final int doc = termPositions.doc();
|
||||||
|
final int freq = termPositions.freq();
|
||||||
|
if (doc <= lastDoc)
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
|
||||||
|
if (doc >= maxDoc)
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
|
||||||
|
|
||||||
|
lastDoc = doc;
|
||||||
|
if (freq <= 0)
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
||||||
|
|
||||||
|
int lastPos = -1;
|
||||||
|
status.totPos += freq;
|
||||||
|
for(int j=0;j<freq;j++) {
|
||||||
|
final int pos = termPositions.nextPosition();
|
||||||
|
if (pos < -1)
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
||||||
|
if (pos < lastPos)
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now count how many deleted docs occurred in
|
||||||
|
// this term:
|
||||||
|
final int delCount;
|
||||||
|
if (reader.hasDeletions()) {
|
||||||
|
myTermDocs.seek(term);
|
||||||
|
while(myTermDocs.next()) { }
|
||||||
|
delCount = myTermDocs.delCount;
|
||||||
|
} else {
|
||||||
|
delCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (freq0 + delCount != docFreq) {
|
||||||
|
throw new RuntimeException("term " + term + " docFreq=" +
|
||||||
|
docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
|
||||||
|
|
||||||
|
} catch (Throwable e) {
|
||||||
|
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||||
|
status.error = e;
|
||||||
|
if (infoStream != null) {
|
||||||
|
e.printStackTrace(infoStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test stored fields for a segment.
|
||||||
|
*/
|
||||||
|
private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
|
||||||
|
final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.print(" test: stored fields.......");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan stored fields for all documents
|
||||||
|
for (int j = 0; j < info.docCount; ++j) {
|
||||||
|
if (!reader.isDeleted(j)) {
|
||||||
|
status.docCount++;
|
||||||
|
Document doc = reader.document(j);
|
||||||
|
status.totFields += doc.getFields().size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate docCount
|
||||||
|
if (status.docCount != reader.numDocs()) {
|
||||||
|
throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
|
||||||
|
}
|
||||||
|
|
||||||
|
msg("OK [" + status.totFields + " total field count; avg " +
|
||||||
|
format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||||
|
status.error = e;
|
||||||
|
if (infoStream != null) {
|
||||||
|
e.printStackTrace(infoStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test term vectors for a segment.
|
||||||
|
*/
|
||||||
|
private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
|
||||||
|
final Status.TermVectorStatus status = new Status.TermVectorStatus();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.print(" test: term vectors........");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < info.docCount; ++j) {
|
||||||
|
if (!reader.isDeleted(j)) {
|
||||||
|
status.docCount++;
|
||||||
|
TermFreqVector[] tfv = reader.getTermFreqVectors(j);
|
||||||
|
if (tfv != null) {
|
||||||
|
status.totVectors += tfv.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg("OK [" + status.totVectors + " total vector count; avg " +
|
||||||
|
format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||||
|
status.error = e;
|
||||||
|
if (infoStream != null) {
|
||||||
|
e.printStackTrace(infoStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
/** Repairs the index using previously returned result
|
/** Repairs the index using previously returned result
|
||||||
* from {@link #checkIndex}. Note that this does not
|
* from {@link #checkIndex}. Note that this does not
|
||||||
* remove any of the unreferenced files after it's done;
|
* remove any of the unreferenced files after it's done;
|
||||||
|
|
|
@ -50,6 +50,7 @@ public class TestCheckIndex extends LuceneTestCase {
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
CheckIndex checker = new CheckIndex(dir);
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
checker.setInfoStream(new PrintStream(bos));
|
checker.setInfoStream(new PrintStream(bos));
|
||||||
|
//checker.setInfoStream(System.out);
|
||||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||||
if (indexStatus.clean == false) {
|
if (indexStatus.clean == false) {
|
||||||
System.out.println("CheckIndex failed");
|
System.out.println("CheckIndex failed");
|
||||||
|
@ -61,6 +62,27 @@ public class TestCheckIndex extends LuceneTestCase {
|
||||||
assertTrue(seg.openReaderPassed);
|
assertTrue(seg.openReaderPassed);
|
||||||
|
|
||||||
assertNotNull(seg.diagnostics);
|
assertNotNull(seg.diagnostics);
|
||||||
|
|
||||||
|
assertNotNull(seg.fieldNormStatus);
|
||||||
|
assertNull(seg.fieldNormStatus.error);
|
||||||
|
assertEquals(1, seg.fieldNormStatus.totFields);
|
||||||
|
|
||||||
|
assertNotNull(seg.termIndexStatus);
|
||||||
|
assertNull(seg.termIndexStatus.error);
|
||||||
|
assertEquals(1, seg.termIndexStatus.termCount);
|
||||||
|
assertEquals(19, seg.termIndexStatus.totFreq);
|
||||||
|
assertEquals(18, seg.termIndexStatus.totPos);
|
||||||
|
|
||||||
|
assertNotNull(seg.storedFieldStatus);
|
||||||
|
assertNull(seg.storedFieldStatus.error);
|
||||||
|
assertEquals(18, seg.storedFieldStatus.docCount);
|
||||||
|
assertEquals(18, seg.storedFieldStatus.totFields);
|
||||||
|
|
||||||
|
assertNotNull(seg.termVectorStatus);
|
||||||
|
assertNull(seg.termVectorStatus.error);
|
||||||
|
assertEquals(18, seg.termVectorStatus.docCount);
|
||||||
|
assertEquals(18, seg.termVectorStatus.totVectors);
|
||||||
|
|
||||||
assertTrue(seg.diagnostics.size() > 0);
|
assertTrue(seg.diagnostics.size() > 0);
|
||||||
final List onlySegments = new ArrayList();
|
final List onlySegments = new ArrayList();
|
||||||
onlySegments.add("_0");
|
onlySegments.add("_0");
|
||||||
|
|
Loading…
Reference in New Issue