mirror of https://github.com/apache/lucene.git
LUCENE-4485: CheckIndex's terms, terms/docs pairs counts don't include deleted docs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a839ab7fb0
commit
cd6c2fe0a2
|
@ -57,6 +57,9 @@ Bug Fixes
|
|||
* LUCENE-4468: Fix rareish integer overflows in Block and Lucene40 postings
|
||||
formats (Robert Muir)
|
||||
|
||||
* LUCENE-4485: When CheckIndex terms, terms/docs pairs and tokens,
|
||||
these counts now all exclude deleted documents. (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
|
||||
|
|
|
@ -233,9 +233,12 @@ public class CheckIndex {
|
|||
TermIndexStatus() {
|
||||
}
|
||||
|
||||
/** Total term count */
|
||||
/** Number of terms with at least one live doc. */
|
||||
public long termCount = 0L;
|
||||
|
||||
/** Number of terms with zero live docs docs. */
|
||||
public long delTermCount = 0L;
|
||||
|
||||
/** Total frequency across all terms. */
|
||||
public long totFreq = 0L;
|
||||
|
||||
|
@ -750,7 +753,7 @@ public class CheckIndex {
|
|||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
|
||||
boolean hasOrd = true;
|
||||
final long termCountStart = status.termCount;
|
||||
final long termCountStart = status.delTermCount + status.termCount;
|
||||
|
||||
BytesRef lastTerm = null;
|
||||
|
||||
|
@ -781,7 +784,6 @@ public class CheckIndex {
|
|||
if (docFreq <= 0) {
|
||||
throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
|
||||
}
|
||||
status.totFreq += docFreq;
|
||||
sumDocFreq += docFreq;
|
||||
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
|
@ -796,15 +798,13 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
if (hasOrd) {
|
||||
final long ordExpected = status.termCount - termCountStart;
|
||||
final long ordExpected = status.delTermCount + status.termCount - termCountStart;
|
||||
if (ord != ordExpected) {
|
||||
throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status.termCount++;
|
||||
|
||||
final DocsEnum docs2;
|
||||
if (postings != null) {
|
||||
docs2 = postings;
|
||||
|
@ -820,6 +820,7 @@ public class CheckIndex {
|
|||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
status.totFreq++;
|
||||
visitedDocs.set(doc);
|
||||
int freq = -1;
|
||||
if (hasFreqs) {
|
||||
|
@ -883,6 +884,12 @@ public class CheckIndex {
|
|||
}
|
||||
}
|
||||
|
||||
if (docCount != 0) {
|
||||
status.termCount++;
|
||||
} else {
|
||||
status.delTermCount++;
|
||||
}
|
||||
|
||||
final long totalTermFreq2 = termsEnum.totalTermFreq();
|
||||
final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
|
||||
|
||||
|
@ -1063,11 +1070,11 @@ public class CheckIndex {
|
|||
// check unique term count
|
||||
long termCount = -1;
|
||||
|
||||
if (status.termCount-termCountStart > 0) {
|
||||
if ((status.delTermCount+status.termCount)-termCountStart > 0) {
|
||||
termCount = fields.terms(field).size();
|
||||
|
||||
if (termCount != -1 && termCount != status.termCount - termCountStart) {
|
||||
throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart));
|
||||
if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) {
|
||||
throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ public class TestCheckIndex extends LuceneTestCase {
|
|||
|
||||
assertNotNull(seg.termIndexStatus);
|
||||
assertNull(seg.termIndexStatus.error);
|
||||
assertEquals(19, seg.termIndexStatus.termCount);
|
||||
assertEquals(19, seg.termIndexStatus.totFreq);
|
||||
assertEquals(18, seg.termIndexStatus.termCount);
|
||||
assertEquals(18, seg.termIndexStatus.totFreq);
|
||||
assertEquals(18, seg.termIndexStatus.totPos);
|
||||
|
||||
assertNotNull(seg.storedFieldStatus);
|
||||
|
|
Loading…
Reference in New Issue