LUCENE-4248: add producer assertions to Codec API / fix producer inconsistencies

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364763 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-07-23 19:26:00 +00:00
parent 32a0f402d6
commit c0f8cd69a8
7 changed files with 95 additions and 11 deletions

View File

@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// w.close();
// }
} else {
assert sumTotalTermFreq == 0;
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
assert docCount == 0;
}

View File

@ -146,6 +146,6 @@ public abstract class PostingsConsumer {
df++;
}
}
return new TermStats(df, totTF);
return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
}
}

View File

@ -205,6 +205,6 @@ public abstract class TermsConsumer {
}
}
}
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
}

View File

@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
}
postingsConsumer.finishDoc();
}
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF;
sumDocFreq += numDocs;
}
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
}
}

View File

@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
sumDF += term.docs.length;
sumTotalTermCount += term.write(termsConsumer);
}
termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
}
}
@ -168,7 +168,7 @@ public class TestCodecs extends LuceneTestCase {
postingsConsumer.finishDoc();
}
}
termsConsumer.finishTerm(text, new TermStats(docs.length, totTF));
termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
return totTF;
}
}

View File

@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
postingsConsumer.finishDoc();
docCount++;
}
termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF));
termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
sumTotalTF += totalTF;
sumDF += postings.size();
}
termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality());
termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
}
fieldsConsumer.close();

View File

@ -18,16 +18,23 @@ package org.apache.lucene.codecs.asserting;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.AssertingAtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
/**
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
@ -39,10 +46,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
super("Asserting");
}
// TODO: we could add some useful checks here?
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return in.fieldsConsumer(state);
return new AssertingFieldsConsumer(in.fieldsConsumer(state));
}
@Override
@ -85,4 +91,82 @@ public class AssertingPostingsFormat extends PostingsFormat {
return in.getUniqueTermCount();
}
}
static class AssertingFieldsConsumer extends FieldsConsumer {
private final FieldsConsumer in;
AssertingFieldsConsumer(FieldsConsumer in) {
this.in = in;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
TermsConsumer consumer = in.addField(field);
assert consumer != null;
return new AssertingTermsConsumer(consumer, field);
}
@Override
public void close() throws IOException {
in.close();
}
}
static enum TermsConsumerState { INITIAL, START, FINISHED };
static class AssertingTermsConsumer extends TermsConsumer {
private final TermsConsumer in;
private final FieldInfo fieldInfo;
private BytesRef lastTerm = null;
private TermsConsumerState state = TermsConsumerState.INITIAL;
AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
this.in = in;
this.fieldInfo = fieldInfo;
}
// TODO: AssertingPostingsConsumer
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
// TODO: this makes the api really confusing! we should try to clean this up!
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
state = TermsConsumerState.START;
assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
lastTerm = BytesRef.deepCopyOf(text);
return in.startTerm(text);
}
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert state == TermsConsumerState.START;
state = TermsConsumerState.INITIAL;
assert text.equals(lastTerm);
assert stats.docFreq > 0; // otherwise, this method should not be called.
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
assert stats.totalTermFreq == -1;
}
in.finishTerm(text, stats);
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
// TODO: this makes the api really confusing! we should try to clean this up!
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
state = TermsConsumerState.FINISHED;
assert docCount >= 0;
assert sumDocFreq >= docCount;
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
assert sumTotalTermFreq == -1;
} else {
assert sumTotalTermFreq >= sumDocFreq;
}
in.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return in.getComparator();
}
}
}