From c0f8cd69a8a8e267305c3d3383bed5616fde4b01 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 23 Jul 2012 19:26:00 +0000 Subject: [PATCH] LUCENE-4248: add producer assertions to Codec API / fix producer inconsistencies git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364763 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/BlockTreeTermsWriter.java | 2 +- .../lucene/codecs/PostingsConsumer.java | 2 +- .../apache/lucene/codecs/TermsConsumer.java | 2 +- .../index/FreqProxTermsWriterPerField.java | 4 +- .../org/apache/lucene/index/TestCodecs.java | 4 +- .../lucene/index/TestPostingsFormat.java | 4 +- .../asserting/AssertingPostingsFormat.java | 88 ++++++++++++++++++- 7 files changed, 95 insertions(+), 11 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java index 80156f869a7..8dc99b3d27d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java @@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer { // w.close(); // } } else { - assert sumTotalTermFreq == 0; + assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1; assert sumDocFreq == 0; assert docCount == 0; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java index f9db84a90ff..e31f4b79343 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java @@ -146,6 +146,6 @@ public abstract class PostingsConsumer { df++; } } - return new TermStats(df, totTF); + return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java index 30419c95919..4148430f408 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java @@ -205,6 +205,6 @@ public abstract class TermsConsumer { } } } - finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality()); + finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality()); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 425f158afce..6a5f1f119bf 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem } postingsConsumer.finishDoc(); } - termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); + termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1)); sumTotalTermFreq += totTF; sumDocFreq += numDocs; } - termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality()); + termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality()); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 7f50fa8ed08..8be1027cd0a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase { sumDF += term.docs.length; sumTotalTermCount += term.write(termsConsumer); } - termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality()); + termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality()); } } @@ -168,7 +168,7 @@ public class TestCodecs extends LuceneTestCase { postingsConsumer.finishDoc(); } } - termsConsumer.finishTerm(text, new TermStats(docs.length, totTF)); + termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF)); return totTF; } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java index c5bdba8ee29..7c55e092df8 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java @@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase { postingsConsumer.finishDoc(); docCount++; } - termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF)); + termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1)); sumTotalTF += totalTF; sumDF += postings.size(); } - termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality()); + termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality()); } fieldsConsumer.close(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index fb33e38deb2..863de992625 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -18,16 +18,23 @@ package org.apache.lucene.codecs.asserting; */ import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.TermStats; +import org.apache.lucene.codecs.TermsConsumer; import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; import org.apache.lucene.index.AssertingAtomicReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; +import org.apache.lucene.util.BytesRef; /** * Just like {@link Lucene40PostingsFormat} but with additional asserts. @@ -39,10 +46,9 @@ public class AssertingPostingsFormat extends PostingsFormat { super("Asserting"); } - // TODO: we could add some useful checks here? @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - return in.fieldsConsumer(state); + return new AssertingFieldsConsumer(in.fieldsConsumer(state)); } @Override @@ -85,4 +91,82 @@ public class AssertingPostingsFormat extends PostingsFormat { return in.getUniqueTermCount(); } } + + static class AssertingFieldsConsumer extends FieldsConsumer { + private final FieldsConsumer in; + + AssertingFieldsConsumer(FieldsConsumer in) { + this.in = in; + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + TermsConsumer consumer = in.addField(field); + assert consumer != null; + return new AssertingTermsConsumer(consumer, field); + } + + @Override + public void close() throws IOException { + in.close(); + } + } + + static enum TermsConsumerState { INITIAL, START, FINISHED }; + static class AssertingTermsConsumer extends TermsConsumer { + private final TermsConsumer in; + private final FieldInfo fieldInfo; + private BytesRef lastTerm = null; + private TermsConsumerState state = TermsConsumerState.INITIAL; + + AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) { + this.in = in; + this.fieldInfo = fieldInfo; + } + + // TODO: AssertingPostingsConsumer + @Override + public PostingsConsumer startTerm(BytesRef text) throws IOException { + // TODO: assert that if state == START (no finishTerm called), that no actual docs were fed. + // TODO: this makes the api really confusing! we should try to clean this up! + assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START; + state = TermsConsumerState.START; + assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0; + lastTerm = BytesRef.deepCopyOf(text); + return in.startTerm(text); + } + + @Override + public void finishTerm(BytesRef text, TermStats stats) throws IOException { + assert state == TermsConsumerState.START; + state = TermsConsumerState.INITIAL; + assert text.equals(lastTerm); + assert stats.docFreq > 0; // otherwise, this method should not be called. + if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { + assert stats.totalTermFreq == -1; + } + in.finishTerm(text, stats); + } + + @Override + public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { + // TODO: assert that if state == START (no finishTerm called), that no actual docs were fed. + // TODO: this makes the api really confusing! we should try to clean this up! + assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START; + state = TermsConsumerState.FINISHED; + assert docCount >= 0; + assert sumDocFreq >= docCount; + if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { + assert sumTotalTermFreq == -1; + } else { + assert sumTotalTermFreq >= sumDocFreq; + } + in.finish(sumTotalTermFreq, sumDocFreq, docCount); + } + + @Override + public Comparator getComparator() throws IOException { + return in.getComparator(); + } + } }