mirror of https://github.com/apache/lucene.git
LUCENE-4248: add producer assertions to Codec API / fix producer inconsistencies
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364763 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
32a0f402d6
commit
c0f8cd69a8
|
@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// w.close();
|
// w.close();
|
||||||
// }
|
// }
|
||||||
} else {
|
} else {
|
||||||
assert sumTotalTermFreq == 0;
|
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
|
||||||
assert sumDocFreq == 0;
|
assert sumDocFreq == 0;
|
||||||
assert docCount == 0;
|
assert docCount == 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,6 +146,6 @@ public abstract class PostingsConsumer {
|
||||||
df++;
|
df++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new TermStats(df, totTF);
|
return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -205,6 +205,6 @@ public abstract class TermsConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
|
termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
|
||||||
sumTotalTermFreq += totTF;
|
sumTotalTermFreq += totTF;
|
||||||
sumDocFreq += numDocs;
|
sumDocFreq += numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
sumDF += term.docs.length;
|
sumDF += term.docs.length;
|
||||||
sumTotalTermCount += term.write(termsConsumer);
|
sumTotalTermCount += term.write(termsConsumer);
|
||||||
}
|
}
|
||||||
termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
|
termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(text, new TermStats(docs.length, totTF));
|
termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
|
||||||
return totTF;
|
return totTF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
docCount++;
|
docCount++;
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF));
|
termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
|
||||||
sumTotalTF += totalTF;
|
sumTotalTF += totalTF;
|
||||||
sumDF += postings.size();
|
sumDF += postings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality());
|
termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldsConsumer.close();
|
fieldsConsumer.close();
|
||||||
|
|
|
@ -18,16 +18,23 @@ package org.apache.lucene.codecs.asserting;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsConsumer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TermStats;
|
||||||
|
import org.apache.lucene.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||||
import org.apache.lucene.index.AssertingAtomicReader;
|
import org.apache.lucene.index.AssertingAtomicReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
|
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
|
||||||
|
@ -39,10 +46,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
|
||||||
super("Asserting");
|
super("Asserting");
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we could add some useful checks here?
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
return in.fieldsConsumer(state);
|
return new AssertingFieldsConsumer(in.fieldsConsumer(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -85,4 +91,82 @@ public class AssertingPostingsFormat extends PostingsFormat {
|
||||||
return in.getUniqueTermCount();
|
return in.getUniqueTermCount();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class AssertingFieldsConsumer extends FieldsConsumer {
|
||||||
|
private final FieldsConsumer in;
|
||||||
|
|
||||||
|
AssertingFieldsConsumer(FieldsConsumer in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||||
|
TermsConsumer consumer = in.addField(field);
|
||||||
|
assert consumer != null;
|
||||||
|
return new AssertingTermsConsumer(consumer, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum TermsConsumerState { INITIAL, START, FINISHED };
|
||||||
|
static class AssertingTermsConsumer extends TermsConsumer {
|
||||||
|
private final TermsConsumer in;
|
||||||
|
private final FieldInfo fieldInfo;
|
||||||
|
private BytesRef lastTerm = null;
|
||||||
|
private TermsConsumerState state = TermsConsumerState.INITIAL;
|
||||||
|
|
||||||
|
AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
|
||||||
|
this.in = in;
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: AssertingPostingsConsumer
|
||||||
|
@Override
|
||||||
|
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||||
|
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
|
||||||
|
// TODO: this makes the api really confusing! we should try to clean this up!
|
||||||
|
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
|
||||||
|
state = TermsConsumerState.START;
|
||||||
|
assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
|
||||||
|
lastTerm = BytesRef.deepCopyOf(text);
|
||||||
|
return in.startTerm(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||||
|
assert state == TermsConsumerState.START;
|
||||||
|
state = TermsConsumerState.INITIAL;
|
||||||
|
assert text.equals(lastTerm);
|
||||||
|
assert stats.docFreq > 0; // otherwise, this method should not be called.
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
|
||||||
|
assert stats.totalTermFreq == -1;
|
||||||
|
}
|
||||||
|
in.finishTerm(text, stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||||
|
// TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
|
||||||
|
// TODO: this makes the api really confusing! we should try to clean this up!
|
||||||
|
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
|
||||||
|
state = TermsConsumerState.FINISHED;
|
||||||
|
assert docCount >= 0;
|
||||||
|
assert sumDocFreq >= docCount;
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
|
||||||
|
assert sumTotalTermFreq == -1;
|
||||||
|
} else {
|
||||||
|
assert sumTotalTermFreq >= sumDocFreq;
|
||||||
|
}
|
||||||
|
in.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return in.getComparator();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue