LUCENE-4248: add producer assertions to Codec API / fix producer inconsistencies

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364763 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-07-23 19:26:00 +00:00
parent 32a0f402d6
commit c0f8cd69a8
7 changed files with 95 additions and 11 deletions

View File

@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// w.close(); // w.close();
// } // }
} else { } else {
assert sumTotalTermFreq == 0; assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
assert sumDocFreq == 0; assert sumDocFreq == 0;
assert docCount == 0; assert docCount == 0;
} }

View File

@ -146,6 +146,6 @@ public abstract class PostingsConsumer {
df++; df++;
} }
} }
return new TermStats(df, totTF); return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
} }
} }

View File

@ -205,6 +205,6 @@ public abstract class TermsConsumer {
} }
} }
} }
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality()); finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
} }
} }

View File

@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
postingsConsumer.finishDoc(); postingsConsumer.finishDoc();
} }
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF; sumTotalTermFreq += totTF;
sumDocFreq += numDocs; sumDocFreq += numDocs;
} }
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality()); termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
} }
} }

View File

@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
sumDF += term.docs.length; sumDF += term.docs.length;
sumTotalTermCount += term.write(termsConsumer); sumTotalTermCount += term.write(termsConsumer);
} }
termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality()); termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
} }
} }
@ -168,7 +168,7 @@ public class TestCodecs extends LuceneTestCase {
postingsConsumer.finishDoc(); postingsConsumer.finishDoc();
} }
} }
termsConsumer.finishTerm(text, new TermStats(docs.length, totTF)); termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
return totTF; return totTF;
} }
} }

View File

@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
postingsConsumer.finishDoc(); postingsConsumer.finishDoc();
docCount++; docCount++;
} }
termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF)); termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
sumTotalTF += totalTF; sumTotalTF += totalTF;
sumDF += postings.size(); sumDF += postings.size();
} }
termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality()); termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
} }
fieldsConsumer.close(); fieldsConsumer.close();

View File

@ -18,16 +18,23 @@ package org.apache.lucene.codecs.asserting;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.AssertingAtomicReader; import org.apache.lucene.index.AssertingAtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
/** /**
* Just like {@link Lucene40PostingsFormat} but with additional asserts. * Just like {@link Lucene40PostingsFormat} but with additional asserts.
@ -39,10 +46,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
super("Asserting"); super("Asserting");
} }
// TODO: we could add some useful checks here?
@Override @Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return in.fieldsConsumer(state); return new AssertingFieldsConsumer(in.fieldsConsumer(state));
} }
@Override @Override
@ -85,4 +91,82 @@ public class AssertingPostingsFormat extends PostingsFormat {
return in.getUniqueTermCount(); return in.getUniqueTermCount();
} }
} }
static class AssertingFieldsConsumer extends FieldsConsumer {
private final FieldsConsumer in;
AssertingFieldsConsumer(FieldsConsumer in) {
this.in = in;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
TermsConsumer consumer = in.addField(field);
assert consumer != null;
return new AssertingTermsConsumer(consumer, field);
}
@Override
public void close() throws IOException {
in.close();
}
}
static enum TermsConsumerState { INITIAL, START, FINISHED };
static class AssertingTermsConsumer extends TermsConsumer {
private final TermsConsumer in;
private final FieldInfo fieldInfo;
private BytesRef lastTerm = null;
private TermsConsumerState state = TermsConsumerState.INITIAL;
AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
this.in = in;
this.fieldInfo = fieldInfo;
}
// TODO: AssertingPostingsConsumer
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
// TODO: this makes the api really confusing! we should try to clean this up!
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
state = TermsConsumerState.START;
assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
lastTerm = BytesRef.deepCopyOf(text);
return in.startTerm(text);
}
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert state == TermsConsumerState.START;
state = TermsConsumerState.INITIAL;
assert text.equals(lastTerm);
assert stats.docFreq > 0; // otherwise, this method should not be called.
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
assert stats.totalTermFreq == -1;
}
in.finishTerm(text, stats);
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
// TODO: this makes the api really confusing! we should try to clean this up!
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
state = TermsConsumerState.FINISHED;
assert docCount >= 0;
assert sumDocFreq >= docCount;
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
assert sumTotalTermFreq == -1;
} else {
assert sumTotalTermFreq >= sumDocFreq;
}
in.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return in.getComparator();
}
}
} }