mirror of https://github.com/apache/lucene.git
LUCENE-3723: add basic sanity checks for stats in sims
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1235747 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
640741783d
commit
1b9e732bed
|
@ -154,6 +154,7 @@ public class BM25Similarity extends Similarity {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||||
|
assert collectionStats.sumTotalTermFreq() >= -1;
|
||||||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||||
|
|
||||||
float avgdl = avgFieldLength(collectionStats);
|
float avgdl = avgFieldLength(collectionStats);
|
||||||
|
|
|
@ -87,6 +87,10 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||||
* Subclasses can override this method to fill additional stats. */
|
* Subclasses can override this method to fill additional stats. */
|
||||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||||
|
assert termStats.totalTermFreq() >= -1;
|
||||||
|
assert termStats.totalTermFreq() == -1 || termStats.totalTermFreq() >= termStats.docFreq();
|
||||||
|
assert collectionStats.sumTotalTermFreq() >= -1;
|
||||||
|
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
|
||||||
int numberOfDocuments = collectionStats.maxDoc();
|
int numberOfDocuments = collectionStats.maxDoc();
|
||||||
|
|
||||||
int docFreq = termStats.docFreq();
|
int docFreq = termStats.docFreq();
|
||||||
|
|
|
@ -70,8 +70,9 @@ import org.apache.lucene.util.ToStringUtils;
|
||||||
* and 'jones' in position 1). </p>
|
* and 'jones' in position 1). </p>
|
||||||
*
|
*
|
||||||
* <p>Note: as {@link #getField()} returns the masked field, scoring will be
|
* <p>Note: as {@link #getField()} returns the masked field, scoring will be
|
||||||
* done using the norms of the field name supplied. This may lead to unexpected
|
* done using the Similarity and collection statistics of the field name supplied,
|
||||||
* scoring behaviour.</p>
|
* but with the term statistics of the real field. This may lead to exceptions,
|
||||||
|
* poor performance, and unexpected scoring behaviour.</p>
|
||||||
*/
|
*/
|
||||||
public class FieldMaskingSpanQuery extends SpanQuery {
|
public class FieldMaskingSpanQuery extends SpanQuery {
|
||||||
private SpanQuery maskedQuery;
|
private SpanQuery maskedQuery;
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.CheckHits;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryUtils;
|
import org.apache.lucene.search.QueryUtils;
|
||||||
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
|
@ -240,6 +241,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimple2() throws Exception {
|
public void testSimple2() throws Exception {
|
||||||
|
assumeTrue("Broken scoring: LUCENE-3723",
|
||||||
|
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
|
||||||
SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
|
SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
|
||||||
SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
|
SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
|
||||||
SpanQuery q = new SpanNearQuery(new SpanQuery[]
|
SpanQuery q = new SpanNearQuery(new SpanQuery[]
|
||||||
|
@ -310,6 +313,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSpans2() throws Exception {
|
public void testSpans2() throws Exception {
|
||||||
|
assumeTrue("Broken scoring: LUCENE-3723",
|
||||||
|
searcher.getSimilarityProvider().get("id") instanceof TFIDFSimilarity);
|
||||||
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
|
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
|
||||||
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
|
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
|
||||||
SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
|
SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
|
||||||
|
|
Loading…
Reference in New Issue