LUCENE-3723: add basic sanity checks for stats in sims

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1235747 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-01-25 13:59:28 +00:00
parent 640741783d
commit 1b9e732bed
4 changed files with 13 additions and 2 deletions

View File

@ -154,6 +154,7 @@ public class BM25Similarity extends Similarity {
@Override
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
assert collectionStats.sumTotalTermFreq() >= -1;
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats);

View File

@ -87,6 +87,10 @@ public abstract class SimilarityBase extends Similarity {
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
assert termStats.totalTermFreq() >= -1;
assert termStats.totalTermFreq() == -1 || termStats.totalTermFreq() >= termStats.docFreq();
assert collectionStats.sumTotalTermFreq() >= -1;
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
int numberOfDocuments = collectionStats.maxDoc();
int docFreq = termStats.docFreq();

View File

@ -70,8 +70,9 @@ import org.apache.lucene.util.ToStringUtils;
* and 'jones' in position 1). </p>
*
* <p>Note: as {@link #getField()} returns the masked field, scoring will be
* done using the norms of the field name supplied. This may lead to unexpected
* scoring behaviour.</p>
* done using the Similarity and collection statistics of the field name supplied,
* but with the term statistics of the real field. This may lead to exceptions,
* poor performance, and unexpected scoring behaviour.</p>
*/
public class FieldMaskingSpanQuery extends SpanQuery {
private SpanQuery maskedQuery;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
@ -240,6 +241,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
}
public void testSimple2() throws Exception {
assumeTrue("Broken scoring: LUCENE-3723",
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
SpanQuery q = new SpanNearQuery(new SpanQuery[]
@ -310,6 +313,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
}
public void testSpans2() throws Exception {
assumeTrue("Broken scoring: LUCENE-3723",
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));