LUCENE-6320: speed up checkindex

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1663505 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-03-03 01:18:33 +00:00
parent 99fd468624
commit 1510f69303
4 changed files with 310 additions and 252 deletions

View File

@ -131,6 +131,8 @@ Optimizations
* LUCENE-6318: Reduce RAM usage of FieldInfos when there are many fields. * LUCENE-6318: Reduce RAM usage of FieldInfos when there are many fields.
(Mike McCandless, Robert Muir) (Mike McCandless, Robert Muir)
* LUCENE-6320: Speed up CheckIndex. (Robert Muir)
API Changes API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files() * LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()

View File

@ -31,7 +31,12 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
@ -174,7 +179,7 @@ public class CheckIndex implements Closeable {
/** Current deletions generation. */ /** Current deletions generation. */
public long deletionsGen; public long deletionsGen;
/** True if we were able to open an LeafReader on this /** True if we were able to open a CodecReader on this
* segment. */ * segment. */
public boolean openReaderPassed; public boolean openReaderPassed;
@ -755,7 +760,7 @@ public class CheckIndex implements Closeable {
* Test live docs. * Test live docs.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.LiveDocStatus testLiveDocs(LeafReader reader, PrintStream infoStream, boolean failFast) throws IOException { public static Status.LiveDocStatus testLiveDocs(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.LiveDocStatus status = new Status.LiveDocStatus(); final Status.LiveDocStatus status = new Status.LiveDocStatus();
@ -812,7 +817,7 @@ public class CheckIndex implements Closeable {
* Test field infos. * Test field infos.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.FieldInfoStatus testFieldInfos(LeafReader reader, PrintStream infoStream, boolean failFast) throws IOException { public static Status.FieldInfoStatus testFieldInfos(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.FieldInfoStatus status = new Status.FieldInfoStatus(); final Status.FieldInfoStatus status = new Status.FieldInfoStatus();
@ -845,7 +850,7 @@ public class CheckIndex implements Closeable {
* Test field norms. * Test field norms.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.FieldNormStatus testFieldNorms(LeafReader reader, PrintStream infoStream, boolean failFast) throws IOException { public static Status.FieldNormStatus testFieldNorms(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.FieldNormStatus status = new Status.FieldNormStatus(); final Status.FieldNormStatus status = new Status.FieldNormStatus();
@ -854,14 +859,14 @@ public class CheckIndex implements Closeable {
if (infoStream != null) { if (infoStream != null) {
infoStream.print(" test: field norms........."); infoStream.print(" test: field norms.........");
} }
NormsProducer normsReader = reader.getNormsReader();
if (normsReader != null) {
normsReader = normsReader.getMergeInstance();
}
for (FieldInfo info : reader.getFieldInfos()) { for (FieldInfo info : reader.getFieldInfos()) {
if (info.hasNorms()) { if (info.hasNorms()) {
checkNorms(info, reader, infoStream); checkNumericDocValues(info.name, reader.maxDoc(), normsReader.getNorms(info), new Bits.MatchAllBits(reader.maxDoc()));
++status.totFields; ++status.totFields;
} else {
if (reader.getNormValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
} }
} }
@ -897,7 +902,6 @@ public class CheckIndex implements Closeable {
int computedFieldCount = 0; int computedFieldCount = 0;
PostingsEnum docs = null; PostingsEnum docs = null;
PostingsEnum docsAndFreqs = null;
PostingsEnum postings = null; PostingsEnum postings = null;
String lastField = null; String lastField = null;
@ -1174,20 +1178,20 @@ public class CheckIndex implements Closeable {
// Re-count if there are deleted docs: // Re-count if there are deleted docs:
if (liveDocs != null) { if (liveDocs != null) {
if (hasFreqs) { if (hasFreqs) {
final PostingsEnum docsNoDel = termsEnum.postings(null, docsAndFreqs); docs = termsEnum.postings(null, docs);
docCount = 0; docCount = 0;
totalTermFreq = 0; totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID()); visitedDocs.set(docs.docID());
docCount++; docCount++;
totalTermFreq += docsNoDel.freq(); totalTermFreq += docs.freq();
} }
} else { } else {
final PostingsEnum docsNoDel = termsEnum.postings(null, docs, PostingsEnum.NONE); docs = termsEnum.postings(null, docs, PostingsEnum.NONE);
docCount = 0; docCount = 0;
totalTermFreq = -1; totalTermFreq = -1;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID()); visitedDocs.set(docs.docID());
docCount++; docCount++;
} }
} }
@ -1457,7 +1461,7 @@ public class CheckIndex implements Closeable {
* Test the term index. * Test the term index.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermIndexStatus testPostings(LeafReader reader, PrintStream infoStream) throws IOException { public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream) throws IOException {
return testPostings(reader, infoStream, false, false); return testPostings(reader, infoStream, false, false);
} }
@ -1465,7 +1469,7 @@ public class CheckIndex implements Closeable {
* Test the term index. * Test the term index.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermIndexStatus testPostings(LeafReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException { public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException {
// TODO: we should go and verify term vectors match, if // TODO: we should go and verify term vectors match, if
// crossCheckTermVectors is on... // crossCheckTermVectors is on...
@ -1479,7 +1483,7 @@ public class CheckIndex implements Closeable {
infoStream.print(" test: terms, freq, prox..."); infoStream.print(" test: terms, freq, prox...");
} }
final Fields fields = reader.fields(); final Fields fields = reader.getPostingsReader().getMergeInstance();
final FieldInfos fieldInfos = reader.getFieldInfos(); final FieldInfos fieldInfos = reader.getFieldInfos();
status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose); status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose);
if (liveDocs != null) { if (liveDocs != null) {
@ -1507,7 +1511,7 @@ public class CheckIndex implements Closeable {
* Test stored fields. * Test stored fields.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.StoredFieldStatus testStoredFields(LeafReader reader, PrintStream infoStream, boolean failFast) throws IOException { public static Status.StoredFieldStatus testStoredFields(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.StoredFieldStatus status = new Status.StoredFieldStatus(); final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
@ -1518,10 +1522,13 @@ public class CheckIndex implements Closeable {
// Scan stored fields for all documents // Scan stored fields for all documents
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
StoredFieldsReader storedFields = reader.getFieldsReader().getMergeInstance();
for (int j = 0; j < reader.maxDoc(); ++j) { for (int j = 0; j < reader.maxDoc(); ++j) {
// Intentionally pull even deleted documents to // Intentionally pull even deleted documents to
// make sure they too are not corrupt: // make sure they too are not corrupt:
StoredDocument doc = reader.document(j); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
storedFields.visitDocument(j, visitor);
StoredDocument doc = visitor.getDocument();
if (liveDocs == null || liveDocs.get(j)) { if (liveDocs == null || liveDocs.get(j)) {
status.docCount++; status.docCount++;
status.totFields += doc.getFields().size(); status.totFields += doc.getFields().size();
@ -1555,7 +1562,7 @@ public class CheckIndex implements Closeable {
* Test docvalues. * Test docvalues.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.DocValuesStatus testDocValues(LeafReader reader, public static Status.DocValuesStatus testDocValues(CodecReader reader,
PrintStream infoStream, PrintStream infoStream,
boolean failFast) throws IOException { boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
@ -1564,18 +1571,14 @@ public class CheckIndex implements Closeable {
if (infoStream != null) { if (infoStream != null) {
infoStream.print(" test: docvalues..........."); infoStream.print(" test: docvalues...........");
} }
DocValuesProducer dvReader = reader.getDocValuesReader();
if (dvReader != null) {
dvReader = dvReader.getMergeInstance();
}
for (FieldInfo fieldInfo : reader.getFieldInfos()) { for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.getDocValuesType() != DocValuesType.NONE) { if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
status.totalValueFields++; status.totalValueFields++;
checkDocValues(fieldInfo, reader, infoStream, status); checkDocValues(fieldInfo, dvReader, reader.maxDoc(), infoStream, status);
} else {
if (reader.getBinaryDocValues(fieldInfo.name) != null ||
reader.getNumericDocValues(fieldInfo.name) != null ||
reader.getSortedDocValues(fieldInfo.name) != null ||
reader.getSortedSetDocValues(fieldInfo.name) != null ||
reader.getDocsWithField(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
}
} }
} }
@ -1601,8 +1604,8 @@ public class CheckIndex implements Closeable {
return status; return status;
} }
private static void checkBinaryDocValues(String fieldName, LeafReader reader, BinaryDocValues dv, Bits docsWithField) { private static void checkBinaryDocValues(String fieldName, int maxDoc, BinaryDocValues dv, Bits docsWithField) {
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < maxDoc; i++) {
final BytesRef term = dv.get(i); final BytesRef term = dv.get(i);
assert term.isValid(); assert term.isValid();
if (docsWithField.get(i) == false && term.length > 0) { if (docsWithField.get(i) == false && term.length > 0) {
@ -1611,12 +1614,12 @@ public class CheckIndex implements Closeable {
} }
} }
private static void checkSortedDocValues(String fieldName, LeafReader reader, SortedDocValues dv, Bits docsWithField) { private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv, Bits docsWithField) {
checkBinaryDocValues(fieldName, reader, dv, docsWithField); checkBinaryDocValues(fieldName, maxDoc, dv, docsWithField);
final int maxOrd = dv.getValueCount()-1; final int maxOrd = dv.getValueCount()-1;
FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount()); FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
int maxOrd2 = -1; int maxOrd2 = -1;
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < maxDoc; i++) {
int ord = dv.getOrd(i); int ord = dv.getOrd(i);
if (ord == -1) { if (ord == -1) {
if (docsWithField.get(i)) { if (docsWithField.get(i)) {
@ -1651,11 +1654,11 @@ public class CheckIndex implements Closeable {
} }
} }
private static void checkSortedSetDocValues(String fieldName, LeafReader reader, SortedSetDocValues dv, Bits docsWithField) { private static void checkSortedSetDocValues(String fieldName, int maxDoc, SortedSetDocValues dv, Bits docsWithField) {
final long maxOrd = dv.getValueCount()-1; final long maxOrd = dv.getValueCount()-1;
LongBitSet seenOrds = new LongBitSet(dv.getValueCount()); LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
long maxOrd2 = -1; long maxOrd2 = -1;
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < maxDoc; i++) {
dv.setDocument(i); dv.setDocument(i);
long lastOrd = -1; long lastOrd = -1;
long ord; long ord;
@ -1721,8 +1724,8 @@ public class CheckIndex implements Closeable {
} }
} }
private static void checkSortedNumericDocValues(String fieldName, LeafReader reader, SortedNumericDocValues ndv, Bits docsWithField) { private static void checkSortedNumericDocValues(String fieldName, int maxDoc, SortedNumericDocValues ndv, Bits docsWithField) {
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < maxDoc; i++) {
ndv.setDocument(i); ndv.setDocument(i);
int count = ndv.count(); int count = ndv.count();
if (docsWithField.get(i)) { if (docsWithField.get(i)) {
@ -1745,8 +1748,8 @@ public class CheckIndex implements Closeable {
} }
} }
private static void checkNumericDocValues(String fieldName, LeafReader reader, NumericDocValues ndv, Bits docsWithField) { private static void checkNumericDocValues(String fieldName, int maxDoc, NumericDocValues ndv, Bits docsWithField) {
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < maxDoc; i++) {
long value = ndv.get(i); long value = ndv.get(i);
if (docsWithField.get(i) == false && value != 0) { if (docsWithField.get(i) == false && value != 0) {
throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i); throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
@ -1754,80 +1757,44 @@ public class CheckIndex implements Closeable {
} }
} }
private static void checkDocValues(FieldInfo fi, LeafReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception { private static void checkDocValues(FieldInfo fi, DocValuesProducer dvReader, int maxDoc, PrintStream infoStream, DocValuesStatus status) throws Exception {
Bits docsWithField = reader.getDocsWithField(fi.name); Bits docsWithField = dvReader.getDocsWithField(fi);
if (docsWithField == null) { if (docsWithField == null) {
throw new RuntimeException(fi.name + " docsWithField does not exist"); throw new RuntimeException(fi.name + " docsWithField does not exist");
} else if (docsWithField.length() != reader.maxDoc()) { } else if (docsWithField.length() != maxDoc) {
throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + reader.maxDoc()); throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + maxDoc);
} }
switch(fi.getDocValuesType()) { switch(fi.getDocValuesType()) {
case SORTED: case SORTED:
status.totalSortedFields++; status.totalSortedFields++;
checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField); checkSortedDocValues(fi.name, maxDoc, dvReader.getSorted(fi), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
throw new RuntimeException(fi.name + " returns multiple docvalues types!");
}
break; break;
case SORTED_NUMERIC: case SORTED_NUMERIC:
status.totalSortedNumericFields++; status.totalSortedNumericFields++;
checkSortedNumericDocValues(fi.name, reader, reader.getSortedNumericDocValues(fi.name), docsWithField); checkSortedNumericDocValues(fi.name, maxDoc, dvReader.getSortedNumeric(fi), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null) {
throw new RuntimeException(fi.name + " returns multiple docvalues types!");
}
break; break;
case SORTED_SET: case SORTED_SET:
status.totalSortedSetFields++; status.totalSortedSetFields++;
checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField); checkSortedSetDocValues(fi.name, maxDoc, dvReader.getSortedSet(fi), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null) {
throw new RuntimeException(fi.name + " returns multiple docvalues types!");
}
break; break;
case BINARY: case BINARY:
status.totalBinaryFields++; status.totalBinaryFields++;
checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField); checkBinaryDocValues(fi.name, maxDoc, dvReader.getBinary(fi), docsWithField);
if (reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
throw new RuntimeException(fi.name + " returns multiple docvalues types!");
}
break; break;
case NUMERIC: case NUMERIC:
status.totalNumericFields++; status.totalNumericFields++;
checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField); checkNumericDocValues(fi.name, maxDoc, dvReader.getNumeric(fi), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
throw new RuntimeException(fi.name + " returns multiple docvalues types!");
}
break; break;
default: default:
throw new AssertionError(); throw new AssertionError();
} }
} }
private static void checkNorms(FieldInfo fi, LeafReader reader, PrintStream infoStream) throws IOException {
if (fi.hasNorms()) {
checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc()));
}
}
/** /**
* Test term vectors. * Test term vectors.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermVectorStatus testTermVectors(LeafReader reader, PrintStream infoStream) throws IOException { public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream) throws IOException {
return testTermVectors(reader, infoStream, false, false, false); return testTermVectors(reader, infoStream, false, false, false);
} }
@ -1835,7 +1802,7 @@ public class CheckIndex implements Closeable {
* Test term vectors. * Test term vectors.
* @lucene.experimental * @lucene.experimental
*/ */
public static Status.TermVectorStatus testTermVectors(LeafReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast) throws IOException { public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast) throws IOException {
long startNS = System.nanoTime(); long startNS = System.nanoTime();
final Status.TermVectorStatus status = new Status.TermVectorStatus(); final Status.TermVectorStatus status = new Status.TermVectorStatus();
final FieldInfos fieldInfos = reader.getFieldInfos(); final FieldInfos fieldInfos = reader.getFieldInfos();
@ -1858,7 +1825,7 @@ public class CheckIndex implements Closeable {
final Fields postingsFields; final Fields postingsFields;
// TODO: testTermsIndex // TODO: testTermsIndex
if (crossCheckTermVectors) { if (crossCheckTermVectors) {
postingsFields = reader.fields(); postingsFields = reader.getPostingsReader().getMergeInstance();
} else { } else {
postingsFields = null; postingsFields = null;
} }
@ -1866,11 +1833,15 @@ public class CheckIndex implements Closeable {
TermsEnum termsEnum = null; TermsEnum termsEnum = null;
TermsEnum postingsTermsEnum = null; TermsEnum postingsTermsEnum = null;
TermVectorsReader vectorsReader = reader.getTermVectorsReader();
if (vectorsReader != null) {
vectorsReader = vectorsReader.getMergeInstance();
for (int j = 0; j < reader.maxDoc(); ++j) { for (int j = 0; j < reader.maxDoc(); ++j) {
// Intentionally pull/visit (but don't count in // Intentionally pull/visit (but don't count in
// stats) deleted documents to make sure they too // stats) deleted documents to make sure they too
// are not corrupt: // are not corrupt:
Fields tfv = reader.getTermVectors(j); Fields tfv = vectorsReader.get(j);
// TODO: can we make a IS(FIR) that searches just // TODO: can we make a IS(FIR) that searches just
// this term vector... to pass for searcher? // this term vector... to pass for searcher?
@ -2047,6 +2018,7 @@ public class CheckIndex implements Closeable {
} }
} }
} }
}
float vectorAvg = status.docCount == 0 ? 0 : status.totVectors / (float)status.docCount; float vectorAvg = status.docCount == 0 ? 0 : status.totVectors / (float)status.docCount;
msg(infoStream, String.format(Locale.ROOT, "OK [%d total term vector count; avg %.1f term/freq vector fields per doc] [took %.3f sec]", msg(infoStream, String.format(Locale.ROOT, "OK [%d total term vector count; avg %.1f term/freq vector fields per doc] [took %.3f sec]",
status.totVectors, vectorAvg, nsToSec(System.nanoTime() - startNS))); status.totVectors, vectorAvg, nsToSec(System.nanoTime() - startNS)));

View File

@ -45,6 +45,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
@ -2674,9 +2675,12 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8); PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
startingGun.await(); startingGun.await();
for (LeafReaderContext leaf : r.leaves()) { for (LeafReaderContext leaf : r.leaves()) {
CheckIndex.testDocValues(leaf.reader(), infoStream, true); DocValuesStatus status = CheckIndex.testDocValues((SegmentReader)leaf.reader(), infoStream, true);
if (status.error != null) {
throw status.error;
} }
} catch (Exception e) { }
} catch (Throwable e) {
throw new RuntimeException(); throw new RuntimeException();
} }
} }

View File

@ -73,6 +73,7 @@ import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.FilterLeafReader;
@ -301,14 +302,23 @@ public final class TestUtil {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8); PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
final CodecReader codecReader;
if (reader instanceof CodecReader) {
codecReader = (CodecReader) reader;
reader.checkIntegrity(); reader.checkIntegrity();
CheckIndex.testLiveDocs(reader, infoStream, true); } else {
CheckIndex.testFieldInfos(reader, infoStream, true); codecReader = SlowCodecReaderWrapper.wrap(reader);
CheckIndex.testFieldNorms(reader, infoStream, true); }
CheckIndex.testPostings(reader, infoStream, false, true); CheckIndex.testLiveDocs(codecReader, infoStream, true);
CheckIndex.testStoredFields(reader, infoStream, true); CheckIndex.testFieldInfos(codecReader, infoStream, true);
CheckIndex.testTermVectors(reader, infoStream, false, crossCheckTermVectors, true); CheckIndex.testFieldNorms(codecReader, infoStream, true);
CheckIndex.testDocValues(reader, infoStream, true); CheckIndex.testPostings(codecReader, infoStream, false, true);
CheckIndex.testStoredFields(codecReader, infoStream, true);
CheckIndex.testTermVectors(codecReader, infoStream, false, crossCheckTermVectors, true);
CheckIndex.testDocValues(codecReader, infoStream, true);
// some checks really against the reader API
checkReaderSanity(reader);
if (LuceneTestCase.INFOSTREAM) { if (LuceneTestCase.INFOSTREAM) {
System.out.println(bos.toString(IOUtils.UTF_8)); System.out.println(bos.toString(IOUtils.UTF_8));
@ -325,6 +335,76 @@ public final class TestUtil {
} }
} }
// used by TestUtil.checkReader to check some things really unrelated to the index,
// just looking for bugs in indexreader implementations.
private static void checkReaderSanity(LeafReader reader) throws IOException {
for (FieldInfo info : reader.getFieldInfos()) {
// reader shouldn't return normValues if the field does not have them
if (!info.hasNorms()) {
if (reader.getNormValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
}
// reader shouldn't return docValues if the field does not have them
// reader shouldn't return multiple docvalues types for the same field.
switch(info.getDocValuesType()) {
case NONE:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null ||
reader.getDocsWithField(info.name) != null) {
throw new RuntimeException("field: " + info.name + " has docvalues but should omit them!");
}
break;
case SORTED:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case SORTED_NUMERIC:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case SORTED_SET:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case BINARY:
if (reader.getNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case NUMERIC:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
default:
throw new AssertionError();
}
}
}
/** start and end are BOTH inclusive */ /** start and end are BOTH inclusive */
public static int nextInt(Random r, int start, int end) { public static int nextInt(Random r, int start, int end) {
return RandomInts.randomIntBetween(r, start, end); return RandomInts.randomIntBetween(r, start, end);