mirror of https://github.com/apache/lucene.git
LUCENE-3723: add basic sanity checks for stats in sims
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1235747 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
640741783d
commit
1b9e732bed
|
@ -154,6 +154,7 @@ public class BM25Similarity extends Similarity {
|
|||
|
||||
@Override
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
assert collectionStats.sumTotalTermFreq() >= -1;
|
||||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||
|
||||
float avgdl = avgFieldLength(collectionStats);
|
||||
|
|
|
@ -87,6 +87,10 @@ public abstract class SimilarityBase extends Similarity {
|
|||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||
* Subclasses can override this method to fill additional stats. */
|
||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
assert termStats.totalTermFreq() >= -1;
|
||||
assert termStats.totalTermFreq() == -1 || termStats.totalTermFreq() >= termStats.docFreq();
|
||||
assert collectionStats.sumTotalTermFreq() >= -1;
|
||||
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
|
||||
int numberOfDocuments = collectionStats.maxDoc();
|
||||
|
||||
int docFreq = termStats.docFreq();
|
||||
|
|
|
@ -70,8 +70,9 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* and 'jones' in position 1). </p>
|
||||
*
|
||||
* <p>Note: as {@link #getField()} returns the masked field, scoring will be
|
||||
* done using the norms of the field name supplied. This may lead to unexpected
|
||||
* scoring behaviour.</p>
|
||||
* done using the Similarity and collection statistics of the field name supplied,
|
||||
* but with the term statistics of the real field. This may lead to exceptions,
|
||||
* poor performance, and unexpected scoring behaviour.</p>
|
||||
*/
|
||||
public class FieldMaskingSpanQuery extends SpanQuery {
|
||||
private SpanQuery maskedQuery;
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.CheckHits;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -240,6 +241,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSimple2() throws Exception {
|
||||
assumeTrue("Broken scoring: LUCENE-3723",
|
||||
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
|
||||
SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
|
||||
SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
|
||||
SpanQuery q = new SpanNearQuery(new SpanQuery[]
|
||||
|
@ -310,6 +313,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSpans2() throws Exception {
|
||||
assumeTrue("Broken scoring: LUCENE-3723",
|
||||
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
|
||||
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
|
||||
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
|
||||
SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
|
||||
|
|
Loading…
Reference in New Issue