LUCENE-4096: impossible to checkindex if you use norms other than byte[]

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1344826 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-31 17:32:11 +00:00
parent ae25687579
commit 168e73bca7
2 changed files with 34 additions and 24 deletions

View File

@ -34,8 +34,6 @@ import org.apache.lucene.document.FieldType; // for javadocs
import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
@ -669,7 +667,7 @@ public class CheckIndex {
* checks Fields api is consistent with itself. * checks Fields api is consistent with itself.
* searcher is optional, to verify with queries. Can be null. * searcher is optional, to verify with queries. Can be null.
*/ */
private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, IndexSearcher searcher, boolean doPrint) throws IOException { private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint) throws IOException {
// TODO: we should probably return our own stats thing...?! // TODO: we should probably return our own stats thing...?!
final Status.TermIndexStatus status = new Status.TermIndexStatus(); final Status.TermIndexStatus status = new Status.TermIndexStatus();
@ -1006,8 +1004,14 @@ public class CheckIndex {
throw new RuntimeException("seek to last term " + lastTerm + " failed"); throw new RuntimeException("seek to last term " + lastTerm + " failed");
} }
if (searcher != null) { int expectedDocFreq = termsEnum.docFreq();
searcher.search(new TermQuery(new Term(field, lastTerm)), 1); DocsEnum d = termsEnum.docs(null, null, false);
int docFreq = 0;
while (d.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
docFreq++;
}
if (docFreq != expectedDocFreq) {
throw new RuntimeException("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
} }
} }
@ -1052,16 +1056,30 @@ public class CheckIndex {
} }
} }
// TermQuery long totDocCountNoDeletes = 0;
if (searcher != null) { long totDocFreq = 0;
long totDocCount2 = 0;
for(int i=0;i<seekCount;i++) { for(int i=0;i<seekCount;i++) {
totDocCount2 += searcher.search(new TermQuery(new Term(field, seekTerms[i])), 1).totalHits; if (!termsEnum.seekExact(seekTerms[i], true)) {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
} }
if (totDocCount != totDocCount2) { totDocFreq += termsEnum.docFreq();
throw new RuntimeException("search to seek terms produced wrong number of hits: " + totDocCount + " vs " + totDocCount2); docs = termsEnum.docs(null, docs, false);
if (docs == null) {
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
} }
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totDocCountNoDeletes++;
}
}
if (totDocCount > totDocCountNoDeletes) {
throw new RuntimeException("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes);
}
if (totDocCountNoDeletes != totDocFreq) {
throw new RuntimeException("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes);
} }
} }
} }
@ -1119,7 +1137,6 @@ public class CheckIndex {
Status.TermIndexStatus status; Status.TermIndexStatus status;
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
final IndexSearcher is = new IndexSearcher(reader);
try { try {
if (infoStream != null) { if (infoStream != null) {
@ -1127,13 +1144,12 @@ public class CheckIndex {
} }
final Fields fields = reader.fields(); final Fields fields = reader.fields();
status = checkFields(fields, liveDocs, maxDoc, fieldInfos, is, true); status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true);
if (liveDocs != null) { if (liveDocs != null) {
if (infoStream != null) { if (infoStream != null) {
infoStream.print(" test (ignoring deletes): terms, freq, prox..."); infoStream.print(" test (ignoring deletes): terms, freq, prox...");
} }
// TODO: can we make a IS that ignores all deletes? checkFields(fields, null, maxDoc, fieldInfos, true);
checkFields(fields, null, maxDoc, fieldInfos, null, true);
} }
} catch (Throwable e) { } catch (Throwable e) {
msg("ERROR: " + e); msg("ERROR: " + e);
@ -1350,10 +1366,10 @@ public class CheckIndex {
if (tfv != null) { if (tfv != null) {
// First run with no deletions: // First run with no deletions:
checkFields(tfv, null, 1, fieldInfos, null, false); checkFields(tfv, null, 1, fieldInfos, false);
// Again, with the one doc deleted: // Again, with the one doc deleted:
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, null, false); checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false);
// Only agg stats if the doc is live: // Only agg stats if the doc is live:
final boolean doStats = liveDocs == null || liveDocs.get(j); final boolean doStats = liveDocs == null || liveDocs.get(j);

View File

@ -32,13 +32,10 @@ import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/** /**
* *
*/ */
// TODO: what is the problem with SimpleText
@SuppressCodecs("SimpleText")
public class TestCustomNorms extends LuceneTestCase { public class TestCustomNorms extends LuceneTestCase {
final String floatTestField = "normsTestFloat"; final String floatTestField = "normsTestFloat";
final String exceptionTestField = "normsTestExcp"; final String exceptionTestField = "normsTestExcp";
@ -46,8 +43,6 @@ public class TestCustomNorms extends LuceneTestCase {
public void testFloatNorms() throws IOException { public void testFloatNorms() throws IOException {
MockDirectoryWrapper dir = newDirectory(); MockDirectoryWrapper dir = newDirectory();
// TODO: what is the checkindex problem?
dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())); new MockAnalyzer(random()));
Similarity provider = new MySimProvider(); Similarity provider = new MySimProvider();
@ -89,7 +84,6 @@ public class TestCustomNorms extends LuceneTestCase {
public void testExceptionOnRandomType() throws IOException { public void testExceptionOnRandomType() throws IOException {
MockDirectoryWrapper dir = newDirectory(); MockDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())); new MockAnalyzer(random()));
Similarity provider = new MySimProvider(); Similarity provider = new MySimProvider();