LUCENE-2880: Make span queries score more consistently with regular queries.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1686301 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-06-18 19:37:05 +00:00
parent 0340838276
commit 81f4c0f1b5
14 changed files with 74 additions and 9 deletions

View File

@ -179,6 +179,9 @@ Changes in Runtime Behavior
* LUCENE-6569: Optimize MultiFunction.anyExists and allExists to eliminate
excessive array creation in common 2 argument usage (Jacob Graves, hossman)
* LUCENE-2880: Span queries now score more consistently with regular queries.
(Robert Muir, Adrien Grand)
Build

View File

@ -47,6 +47,11 @@ abstract class ContainSpans extends ConjunctionSpans {
: sourceSpans.endPosition();
}
@Override
public int width() {
return sourceSpans.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
sourceSpans.collect(collector);

View File

@ -110,6 +110,11 @@ public abstract class FilterSpans extends Spans {
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
}
@Override
public int width() {
return in.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);

View File

@ -128,6 +128,11 @@ public class NearSpansOrdered extends NearSpans {
return atFirstInCurrentDoc ? -1 : matchEnd;
}
@Override
public int width() {
return matchWidth;
}
@Override
public void collect(SpanCollector collector) throws IOException {
for (Spans span : subSpans) {

View File

@ -114,6 +114,11 @@ public class NearSpansUnordered extends NearSpans {
return in.endPosition();
}
@Override
public int width() {
return in.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);
@ -241,6 +246,11 @@ public class NearSpansUnordered extends NearSpans {
: maxEndPositionCell.endPosition();
}
@Override
public int width() {
return maxEndPositionCell.startPosition() - minPositionCell().startPosition();
}
@Override
public void collect(SpanCollector collector) throws IOException {
for (SpansCell cell : subSpanCells) {

View File

@ -328,6 +328,11 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
}
@Override
public int width() {
return topPositionSpans.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
if (topPositionSpans != null)

View File

@ -99,8 +99,7 @@ public class SpanScorer extends Scorer {
freq = 1;
return;
}
int matchLength = endPos - startPos;
freq += docScorer.computeSlopFactor(matchLength);
freq += docScorer.computeSlopFactor(spans.width());
prevStartPos = startPos;
prevEndPos = endPos;
startPos = spans.nextStartPosition();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.spans;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import java.io.IOException;
@ -49,6 +50,15 @@ public abstract class Spans extends DocIdSetIterator {
*/
public abstract int endPosition();
/**
* Return the width of the match, which is typically used to compute
* the {@link SimScorer#computeSlopFactor(int) slop factor}. It is only legal
* to call this method when the iterator is on a valid doc ID and positioned.
* The return value must be positive, and lower values means that the match is
* better.
*/
public abstract int width();
/**
* Collect postings data from the leaves of the current Spans.
*

View File

@ -101,6 +101,11 @@ public class TermSpans extends Spans {
: NO_MORE_POSITIONS;
}
@Override
public int width() {
return 0;
}
@Override
public long cost() {
return postings.cost();

View File

@ -60,6 +60,11 @@ final class JustCompileSearchSpans {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int width() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void collect(SpanCollector collector) throws IOException {
@ -122,6 +127,11 @@ final class JustCompileSearchSpans {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int width() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void collect(SpanCollector collector) throws IOException {

View File

@ -39,7 +39,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
/** SpanTermQuery(A) = TermQuery(A) */
public void testSpanTermVersusTerm() throws Exception {
Term t1 = randomTerm();
assertSameSet(new TermQuery(t1), spanQuery(new SpanTermQuery(t1)));
assertSameScores(new TermQuery(t1), spanQuery(new SpanTermQuery(t1)));
}
/** SpanOrQuery(A) = SpanTermQuery(A) */
@ -154,7 +154,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
};
SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
assertSameSet(q1, q2);
assertSameScores(q1, q2);
}
/** SpanNearQuery([A, B], ∞, false) = +A +B */

View File

@ -99,7 +99,7 @@ public class TestSpansAdvanced extends LuceneTestCase {
*/
public void testBooleanQueryWithSpanQueries() throws IOException {
doTestBooleanQueryWithSpanQueries(searcher, 0.3884282f);
doTestBooleanQueryWithSpanQueries(searcher, 0.54932045f);
}
/**

View File

@ -86,8 +86,8 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
final String[] expectedIds = new String[] {"B", "D", "1", "2", "3", "4",
"A"};
final float[] expectedScores = new float[] {0.625f, 0.45927936f,
0.35355338f, 0.35355338f, 0.35355338f, 0.35355338f, 0.26516503f,};
final float[] expectedScores = new float[] {0.8838834f, 0.64951903f,
0.5f, 0.5f, 0.5f, 0.5f, 0.375f,};
assertHits(searcher2, spanQuery, "single span query", expectedIds,
expectedScores);
}
@ -105,7 +105,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
final String[] expectedIds = new String[] {"D", "A"};
// these values were pre LUCENE-413
// final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
final float[] expectedScores = new float[] {1.0191123f, 0.93163157f};
final float[] expectedScores = new float[] {1.44124233f, 1.31752586f};
assertHits(searcher2, query.build(), "multiple different span queries",
expectedIds, expectedScores);
}
@ -116,6 +116,6 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
@Override
public void testBooleanQueryWithSpanQueries() throws IOException {
doTestBooleanQueryWithSpanQueries(searcher2, 0.73500174f);
doTestBooleanQueryWithSpanQueries(searcher2, 1.0394494f);
}
}

View File

@ -124,6 +124,14 @@ class AssertingSpans extends Spans {
return in.endPosition();
}
@Override
public int width() {
assert state == State.ITERATING;
final int distance = in.width();
assert distance >= 0;
return distance;
}
@Override
public void collect(SpanCollector collector) throws IOException {
assert state == State.ITERATING : "collect() called in illegal state: " + state + ": " + in;