mirror of https://github.com/apache/lucene.git
LUCENE-2880: Make span queries score more consistently with regular queries.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1686301 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0340838276
commit
81f4c0f1b5
|
@ -179,6 +179,9 @@ Changes in Runtime Behavior
|
|||
* LUCENE-6569: Optimize MultiFunction.anyExists and allExists to eliminate
|
||||
excessive array creation in common 2 argument usage (Jacob Graves, hossman)
|
||||
|
||||
* LUCENE-2880: Span queries now score more consistently with regular queries.
|
||||
(Robert Muir, Adrien Grand)
|
||||
|
||||
|
||||
Build
|
||||
|
||||
|
|
|
@ -47,6 +47,11 @@ abstract class ContainSpans extends ConjunctionSpans {
|
|||
: sourceSpans.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return sourceSpans.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
sourceSpans.collect(collector);
|
||||
|
|
|
@ -110,6 +110,11 @@ public abstract class FilterSpans extends Spans {
|
|||
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return in.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
in.collect(collector);
|
||||
|
|
|
@ -128,6 +128,11 @@ public class NearSpansOrdered extends NearSpans {
|
|||
return atFirstInCurrentDoc ? -1 : matchEnd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return matchWidth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
for (Spans span : subSpans) {
|
||||
|
|
|
@ -114,6 +114,11 @@ public class NearSpansUnordered extends NearSpans {
|
|||
return in.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return in.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
in.collect(collector);
|
||||
|
@ -241,6 +246,11 @@ public class NearSpansUnordered extends NearSpans {
|
|||
: maxEndPositionCell.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return maxEndPositionCell.startPosition() - minPositionCell().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
|
|
|
@ -328,6 +328,11 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return topPositionSpans.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
if (topPositionSpans != null)
|
||||
|
|
|
@ -99,8 +99,7 @@ public class SpanScorer extends Scorer {
|
|||
freq = 1;
|
||||
return;
|
||||
}
|
||||
int matchLength = endPos - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
freq += docScorer.computeSlopFactor(spans.width());
|
||||
prevStartPos = startPos;
|
||||
prevEndPos = endPos;
|
||||
startPos = spans.nextStartPosition();
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -49,6 +50,15 @@ public abstract class Spans extends DocIdSetIterator {
|
|||
*/
|
||||
public abstract int endPosition();
|
||||
|
||||
/**
|
||||
* Return the width of the match, which is typically used to compute
|
||||
* the {@link SimScorer#computeSlopFactor(int) slop factor}. It is only legal
|
||||
* to call this method when the iterator is on a valid doc ID and positioned.
|
||||
* The return value must be positive, and lower values means that the match is
|
||||
* better.
|
||||
*/
|
||||
public abstract int width();
|
||||
|
||||
/**
|
||||
* Collect postings data from the leaves of the current Spans.
|
||||
*
|
||||
|
|
|
@ -101,6 +101,11 @@ public class TermSpans extends Spans {
|
|||
: NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return postings.cost();
|
||||
|
|
|
@ -60,6 +60,11 @@ final class JustCompileSearchSpans {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
|
||||
|
@ -122,6 +127,11 @@ final class JustCompileSearchSpans {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
/** SpanTermQuery(A) = TermQuery(A) */
|
||||
public void testSpanTermVersusTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
assertSameSet(new TermQuery(t1), spanQuery(new SpanTermQuery(t1)));
|
||||
assertSameScores(new TermQuery(t1), spanQuery(new SpanTermQuery(t1)));
|
||||
}
|
||||
|
||||
/** SpanOrQuery(A) = SpanTermQuery(A) */
|
||||
|
@ -154,7 +154,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
};
|
||||
SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
|
||||
PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
|
||||
assertSameSet(q1, q2);
|
||||
assertSameScores(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A, B], ∞, false) = +A +B */
|
||||
|
|
|
@ -99,7 +99,7 @@ public class TestSpansAdvanced extends LuceneTestCase {
|
|||
*/
|
||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
||||
|
||||
doTestBooleanQueryWithSpanQueries(searcher, 0.3884282f);
|
||||
doTestBooleanQueryWithSpanQueries(searcher, 0.54932045f);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -86,8 +86,8 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
|||
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
|
||||
final String[] expectedIds = new String[] {"B", "D", "1", "2", "3", "4",
|
||||
"A"};
|
||||
final float[] expectedScores = new float[] {0.625f, 0.45927936f,
|
||||
0.35355338f, 0.35355338f, 0.35355338f, 0.35355338f, 0.26516503f,};
|
||||
final float[] expectedScores = new float[] {0.8838834f, 0.64951903f,
|
||||
0.5f, 0.5f, 0.5f, 0.5f, 0.375f,};
|
||||
assertHits(searcher2, spanQuery, "single span query", expectedIds,
|
||||
expectedScores);
|
||||
}
|
||||
|
@ -105,7 +105,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
|||
final String[] expectedIds = new String[] {"D", "A"};
|
||||
// these values were pre LUCENE-413
|
||||
// final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
|
||||
final float[] expectedScores = new float[] {1.0191123f, 0.93163157f};
|
||||
final float[] expectedScores = new float[] {1.44124233f, 1.31752586f};
|
||||
assertHits(searcher2, query.build(), "multiple different span queries",
|
||||
expectedIds, expectedScores);
|
||||
}
|
||||
|
@ -116,6 +116,6 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
|||
@Override
|
||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
||||
|
||||
doTestBooleanQueryWithSpanQueries(searcher2, 0.73500174f);
|
||||
doTestBooleanQueryWithSpanQueries(searcher2, 1.0394494f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,6 +124,14 @@ class AssertingSpans extends Spans {
|
|||
return in.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
assert state == State.ITERATING;
|
||||
final int distance = in.width();
|
||||
assert distance >= 0;
|
||||
return distance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
assert state == State.ITERATING : "collect() called in illegal state: " + state + ": " + in;
|
||||
|
|
Loading…
Reference in New Issue