From 168e73bca7817c08590203fd57fbb9ebfe513575 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 31 May 2012 17:32:11 +0000 Subject: [PATCH] LUCENE-4096: impossible to checkindex if you use norms other than byte[] git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1344826 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/CheckIndex.java | 52 ++++++++++++------- .../apache/lucene/index/TestCustomNorms.java | 6 --- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 7f94e6ea662..d3183fb5797 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -34,8 +34,6 @@ import org.apache.lucene.document.FieldType; // for javadocs import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; @@ -669,7 +667,7 @@ public class CheckIndex { * checks Fields api is consistent with itself. * searcher is optional, to verify with queries. Can be null. */ - private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, IndexSearcher searcher, boolean doPrint) throws IOException { + private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint) throws IOException { // TODO: we should probably return our own stats thing...?! final Status.TermIndexStatus status = new Status.TermIndexStatus(); @@ -1006,8 +1004,14 @@ public class CheckIndex { throw new RuntimeException("seek to last term " + lastTerm + " failed"); } - if (searcher != null) { - searcher.search(new TermQuery(new Term(field, lastTerm)), 1); + int expectedDocFreq = termsEnum.docFreq(); + DocsEnum d = termsEnum.docs(null, null, false); + int docFreq = 0; + while (d.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + docFreq++; + } + if (docFreq != expectedDocFreq) { + throw new RuntimeException("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq); } } @@ -1052,16 +1056,30 @@ public class CheckIndex { } } - // TermQuery - if (searcher != null) { - long totDocCount2 = 0; - for(int i=0;i totDocCountNoDeletes) { + throw new RuntimeException("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes); + } + + if (totDocCountNoDeletes != totDocFreq) { + throw new RuntimeException("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes); } } } @@ -1119,7 +1137,6 @@ public class CheckIndex { Status.TermIndexStatus status; final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); - final IndexSearcher is = new IndexSearcher(reader); try { if (infoStream != null) { @@ -1127,13 +1144,12 @@ public class CheckIndex { } final Fields fields = reader.fields(); - status = checkFields(fields, liveDocs, maxDoc, fieldInfos, is, true); + status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true); if (liveDocs != null) { if (infoStream != null) { infoStream.print(" test (ignoring deletes): terms, freq, prox..."); } - // TODO: can we make a IS that ignores all deletes? - checkFields(fields, null, maxDoc, fieldInfos, null, true); + checkFields(fields, null, maxDoc, fieldInfos, true); } } catch (Throwable e) { msg("ERROR: " + e); @@ -1350,10 +1366,10 @@ public class CheckIndex { if (tfv != null) { // First run with no deletions: - checkFields(tfv, null, 1, fieldInfos, null, false); + checkFields(tfv, null, 1, fieldInfos, false); // Again, with the one doc deleted: - checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, null, false); + checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false); // Only agg stats if the doc is live: final boolean doStats = liveDocs == null || liveDocs.get(j); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java index 244340c9be1..d79643d0274 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java @@ -32,13 +32,10 @@ import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /** * */ -// TODO: what is the problem with SimpleText -@SuppressCodecs("SimpleText") public class TestCustomNorms extends LuceneTestCase { final String floatTestField = "normsTestFloat"; final String exceptionTestField = "normsTestExcp"; @@ -46,8 +43,6 @@ public class TestCustomNorms extends LuceneTestCase { public void testFloatNorms() throws IOException { MockDirectoryWrapper dir = newDirectory(); - // TODO: what is the checkindex problem? - dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); Similarity provider = new MySimProvider(); @@ -89,7 +84,6 @@ public class TestCustomNorms extends LuceneTestCase { public void testExceptionOnRandomType() throws IOException { MockDirectoryWrapper dir = newDirectory(); - dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); Similarity provider = new MySimProvider();