mirror of https://github.com/apache/lucene.git
LUCENE-2590: Enable access to the freq information in a Query's sub-scorers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@991310 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d97f1f25d6
commit
5fe8094027
|
@ -208,6 +208,11 @@ New features
|
|||
* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq. Along
|
||||
with a custom Collector these experimental methods make it possible
|
||||
to gather the hit-count per sub-clause and per document while a
|
||||
search is running. (Simon Willnauer, Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
||||
|
|
|
@ -320,7 +320,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
|
||||
// Check if we can return a BooleanScorer
|
||||
if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
|
||||
return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited, maxCoord);
|
||||
return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
|
||||
}
|
||||
|
||||
if (required.size() == 0 && optional.size() == 0) {
|
||||
|
@ -334,7 +334,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
|
||||
// Return a BooleanScorer2
|
||||
return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
|
||||
return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
/* Description from Doug Cutting (excerpted from
|
||||
* LUCENE-1483):
|
||||
|
@ -115,6 +116,7 @@ final class BooleanScorer extends Scorer {
|
|||
|
||||
float score;
|
||||
int doc = NO_MORE_DOCS;
|
||||
int freq;
|
||||
|
||||
public BucketScorer() { super(null); }
|
||||
|
||||
|
@ -124,6 +126,9 @@ final class BooleanScorer extends Scorer {
|
|||
@Override
|
||||
public int docID() { return doc; }
|
||||
|
||||
@Override
|
||||
public float freq() { return freq; }
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException { return NO_MORE_DOCS; }
|
||||
|
||||
|
@ -159,7 +164,8 @@ final class BooleanScorer extends Scorer {
|
|||
|
||||
static final class SubScorer {
|
||||
public Scorer scorer;
|
||||
public boolean required = false;
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
//public boolean required = false;
|
||||
public boolean prohibited = false;
|
||||
public Collector collector;
|
||||
public SubScorer next;
|
||||
|
@ -167,8 +173,12 @@ final class BooleanScorer extends Scorer {
|
|||
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
|
||||
Collector collector, SubScorer next)
|
||||
throws IOException {
|
||||
if (required) {
|
||||
throw new IllegalArgumentException("this scorer cannot handle required=true");
|
||||
}
|
||||
this.scorer = scorer;
|
||||
this.required = required;
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
//this.required = required;
|
||||
this.prohibited = prohibited;
|
||||
this.collector = collector;
|
||||
this.next = next;
|
||||
|
@ -178,17 +188,18 @@ final class BooleanScorer extends Scorer {
|
|||
private SubScorer scorers = null;
|
||||
private BucketTable bucketTable = new BucketTable();
|
||||
private final float[] coordFactors;
|
||||
private int requiredMask = 0;
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
//private int requiredMask = 0;
|
||||
private int prohibitedMask = 0;
|
||||
private int nextMask = 1;
|
||||
private final int minNrShouldMatch;
|
||||
private int end;
|
||||
private Bucket current;
|
||||
private int doc = -1;
|
||||
|
||||
BooleanScorer(Similarity similarity, int minNrShouldMatch,
|
||||
|
||||
BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch,
|
||||
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity, weight);
|
||||
this.minNrShouldMatch = minNrShouldMatch;
|
||||
|
||||
if (optionalScorers != null && optionalScorers.size() > 0) {
|
||||
|
@ -231,8 +242,11 @@ final class BooleanScorer extends Scorer {
|
|||
while (current != null) { // more queued
|
||||
|
||||
// check prohibited & required
|
||||
if ((current.bits & prohibitedMask) == 0 &&
|
||||
(current.bits & requiredMask) == requiredMask) {
|
||||
if ((current.bits & prohibitedMask) == 0) {
|
||||
|
||||
// TODO: re-enable this if BQ ever sends us required
|
||||
// clauses
|
||||
//&& (current.bits & requiredMask) == requiredMask) {
|
||||
|
||||
if (current.doc >= max){
|
||||
tmp = current;
|
||||
|
@ -245,6 +259,7 @@ final class BooleanScorer extends Scorer {
|
|||
if (current.coord >= minNrShouldMatch) {
|
||||
bs.score = current.score * coordFactors[current.coord];
|
||||
bs.doc = current.doc;
|
||||
bs.freq = current.coord;
|
||||
collector.collect(current.doc);
|
||||
}
|
||||
}
|
||||
|
@ -294,8 +309,9 @@ final class BooleanScorer extends Scorer {
|
|||
|
||||
// check prohibited & required, and minNrShouldMatch
|
||||
if ((current.bits & prohibitedMask) == 0 &&
|
||||
(current.bits & requiredMask) == requiredMask &&
|
||||
current.coord >= minNrShouldMatch) {
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
// (current.bits & requiredMask) == requiredMask &&
|
||||
return doc = current.doc;
|
||||
}
|
||||
}
|
||||
|
@ -339,5 +355,28 @@ final class BooleanScorer extends Scorer {
|
|||
buffer.append(")");
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
|
||||
super.visitSubScorers(parent, relationship, visitor);
|
||||
final Query q = weight.getQuery();
|
||||
SubScorer sub = scorers;
|
||||
while(sub != null) {
|
||||
// TODO: re-enable this if BQ ever sends us required
|
||||
//clauses
|
||||
//if (sub.required) {
|
||||
//relationship = Occur.MUST;
|
||||
if (!sub.prohibited) {
|
||||
relationship = Occur.SHOULD;
|
||||
} else {
|
||||
// TODO: maybe it's pointless to do this, but, it is
|
||||
// possible the doc may still be collected, eg foo
|
||||
// OR (bar -fee)
|
||||
relationship = Occur.MUST_NOT;
|
||||
}
|
||||
sub.scorer.visitSubScorers(q, relationship, visitor);
|
||||
sub = sub.next;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
/* See the description in BooleanScorer.java, comparing
|
||||
* BooleanScorer & BooleanScorer2 */
|
||||
|
||||
|
@ -59,7 +61,7 @@ class BooleanScorer2 extends Scorer {
|
|||
|
||||
/** The number of optionalScorers that need to match (if there are any) */
|
||||
private final int minNrShouldMatch;
|
||||
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
/**
|
||||
|
@ -80,9 +82,9 @@ class BooleanScorer2 extends Scorer {
|
|||
* @param optional
|
||||
* the list of optional scorers.
|
||||
*/
|
||||
public BooleanScorer2(Similarity similarity, int minNrShouldMatch,
|
||||
public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch,
|
||||
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity, weight);
|
||||
if (minNrShouldMatch < 0) {
|
||||
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
|
||||
}
|
||||
|
@ -301,10 +303,28 @@ class BooleanScorer2 extends Scorer {
|
|||
return sum * coordinator.coordFactors[coordinator.nrMatchers];
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
return coordinator.nrMatchers;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = countingSumScorer.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
|
||||
super.visitSubScorers(parent, relationship, visitor);
|
||||
final Query q = weight.getQuery();
|
||||
for (Scorer s : optionalScorers) {
|
||||
s.visitSubScorers(q, Occur.SHOULD, visitor);
|
||||
}
|
||||
for (Scorer s : prohibitedScorers) {
|
||||
s.visitSubScorers(q, Occur.MUST_NOT, visitor);
|
||||
}
|
||||
for (Scorer s : requiredScorers) {
|
||||
s.visitSubScorers(q, Occur.MUST, visitor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ public class ConstantScoreQuery extends Query {
|
|||
int doc = -1;
|
||||
|
||||
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity,w);
|
||||
theScore = w.getValue();
|
||||
DocIdSet docIdSet = filter.getDocIdSet(reader);
|
||||
if (docIdSet == null) {
|
||||
|
|
|
@ -21,9 +21,9 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
final class ExactPhraseScorer extends Scorer {
|
||||
private final Weight weight;
|
||||
private final byte[] norms;
|
||||
private final float value;
|
||||
|
||||
|
@ -63,8 +63,7 @@ final class ExactPhraseScorer extends Scorer {
|
|||
|
||||
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity similarity, byte[] norms) throws IOException {
|
||||
super(similarity);
|
||||
this.weight = weight;
|
||||
super(similarity, weight);
|
||||
this.norms = norms;
|
||||
this.value = weight.getValue();
|
||||
|
||||
|
@ -193,8 +192,8 @@ final class ExactPhraseScorer extends Scorer {
|
|||
return "ExactPhraseScorer(" + weight + ")";
|
||||
}
|
||||
|
||||
// used by MultiPhraseQuery
|
||||
float currentFreq() {
|
||||
@Override
|
||||
public float freq() {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -126,7 +126,7 @@ extends Query {
|
|||
return null;
|
||||
}
|
||||
|
||||
return new Scorer(similarity) {
|
||||
return new Scorer(similarity, this) {
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ public class MatchAllDocsQuery extends Query {
|
|||
|
||||
MatchAllScorer(IndexReader reader, Similarity similarity, Weight w,
|
||||
byte[] norms) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity,w);
|
||||
delDocs = MultiFields.getDeletedDocs(reader);
|
||||
score = w.getValue();
|
||||
maxDoc = reader.maxDoc();
|
||||
|
|
|
@ -271,11 +271,7 @@ public class MultiPhraseQuery extends Query {
|
|||
int d = scorer.advance(doc);
|
||||
float phraseFreq;
|
||||
if (d == doc) {
|
||||
if (slop == 0) {
|
||||
phraseFreq = ((ExactPhraseScorer) scorer).currentFreq();
|
||||
} else {
|
||||
phraseFreq = ((SloppyPhraseScorer) scorer).currentFreq();
|
||||
}
|
||||
phraseFreq = scorer.freq();
|
||||
} else {
|
||||
phraseFreq = 0.0f;
|
||||
}
|
||||
|
|
|
@ -275,11 +275,7 @@ public class PhraseQuery extends Query {
|
|||
int d = scorer.advance(doc);
|
||||
float phraseFreq;
|
||||
if (d == doc) {
|
||||
if (slop == 0) {
|
||||
phraseFreq = ((ExactPhraseScorer) scorer).currentFreq();
|
||||
} else {
|
||||
phraseFreq = ((SloppyPhraseScorer) scorer).currentFreq();
|
||||
}
|
||||
phraseFreq = scorer.freq();
|
||||
} else {
|
||||
phraseFreq = 0.0f;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
/** Expert: Scoring functionality for phrase queries.
|
||||
* <br>A document is considered matching if it contains the phrase-query terms
|
||||
* at "valid" positions. What "valid positions" are
|
||||
|
@ -30,7 +32,6 @@ import java.io.IOException;
|
|||
* means a match.
|
||||
*/
|
||||
abstract class PhraseScorer extends Scorer {
|
||||
private Weight weight;
|
||||
protected byte[] norms;
|
||||
protected float value;
|
||||
|
||||
|
@ -43,9 +44,8 @@ abstract class PhraseScorer extends Scorer {
|
|||
|
||||
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity similarity, byte[] norms) {
|
||||
super(similarity);
|
||||
super(similarity, weight);
|
||||
this.norms = norms;
|
||||
this.weight = weight;
|
||||
this.value = weight.getValue();
|
||||
|
||||
// convert tps to a list of phrase positions.
|
||||
|
@ -129,8 +129,11 @@ abstract class PhraseScorer extends Scorer {
|
|||
/**
|
||||
* phrase frequency in current doc as computed by phraseFreq().
|
||||
*/
|
||||
public final float currentFreq() { return freq; }
|
||||
|
||||
@Override
|
||||
public final float freq() {
|
||||
return freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a document containing all the phrase query terms, compute the
|
||||
* frequency of the phrase in that document.
|
||||
|
@ -179,5 +182,5 @@ abstract class PhraseScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ import java.io.IOException;
|
|||
*/
|
||||
public class ScoreCachingWrappingScorer extends Scorer {
|
||||
|
||||
private Scorer scorer;
|
||||
private final Scorer scorer;
|
||||
private int curDoc = -1;
|
||||
private float curScore;
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
/**
|
||||
* Expert: Common scoring functionality for different types of queries.
|
||||
*
|
||||
|
@ -39,12 +41,23 @@ import java.io.IOException;
|
|||
*/
|
||||
public abstract class Scorer extends DocIdSetIterator {
|
||||
private final Similarity similarity;
|
||||
protected final Weight weight;
|
||||
|
||||
/** Constructs a Scorer.
|
||||
* @param similarity The <code>Similarity</code> implementation used by this scorer.
|
||||
*/
|
||||
protected Scorer(Similarity similarity) {
|
||||
this(similarity, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a Scorer
|
||||
* @param similarity The <code>Similarity</code> implementation used by this scorer.
|
||||
* @param weight The scorers <code>Weight</code>
|
||||
*/
|
||||
protected Scorer(Similarity similarity, Weight weight) {
|
||||
this.similarity = similarity;
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
/** Returns the Similarity implementation used by this scorer. */
|
||||
|
@ -94,4 +107,92 @@ public abstract class Scorer extends DocIdSetIterator {
|
|||
*/
|
||||
public abstract float score() throws IOException;
|
||||
|
||||
/** Returns number of matches for the current document.
|
||||
* This returns a float (not int) because
|
||||
* SloppyPhraseScorer discounts its freq according to how
|
||||
* "sloppy" the match was.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public float freq() throws IOException {
|
||||
throw new UnsupportedOperationException(this + " does not implement freq()");
|
||||
}
|
||||
|
||||
/**
|
||||
* A callback to gather information from a scorer and its sub-scorers. Each
|
||||
* the top-level scorer as well as each of its sub-scorers are passed to
|
||||
* either one of the visit methods depending on their boolean relationship in
|
||||
* the query.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static abstract class ScorerVisitor<P extends Query, C extends Query, S extends Scorer> {
|
||||
/**
|
||||
* Invoked for all optional scorer
|
||||
*
|
||||
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
|
||||
* @param child the query of the currently visited scorer
|
||||
* @param scorer the current scorer
|
||||
*/
|
||||
public void visitOptional(P parent, C child, S scorer) {}
|
||||
|
||||
/**
|
||||
* Invoked for all required scorer
|
||||
*
|
||||
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
|
||||
* @param child the query of the currently visited scorer
|
||||
* @param scorer the current scorer
|
||||
*/
|
||||
public void visitRequired(P parent, C child, S scorer) {}
|
||||
|
||||
/**
|
||||
* Invoked for all prohibited scorer
|
||||
*
|
||||
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
|
||||
* @param child the query of the currently visited scorer
|
||||
* @param scorer the current scorer
|
||||
*/
|
||||
public void visitProhibited(P parent, C child, S scorer) {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: call this to gather details for all sub-scorers for this query.
|
||||
* This can be used, in conjunction with a custom {@link Collector} to gather
|
||||
* details about how each sub-query matched the current hit.
|
||||
*
|
||||
* @param visitor a callback executed for each sub-scorer
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void visitScorers(ScorerVisitor<Query, Query, Scorer> visitor) {
|
||||
visitSubScorers(null, Occur.MUST/*must id default*/, visitor);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link Scorer} subclasses should implement this method if the subclass
|
||||
* itself contains multiple scorers to support gathering details for
|
||||
* sub-scorers via {@link ScorerVisitor}
|
||||
* <p>
|
||||
* Note: this method will throw {@link UnsupportedOperationException} if no
|
||||
* associated {@link Weight} instance is provided to
|
||||
* {@link #Scorer(Similarity, Weight)}
|
||||
* </p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
protected void visitSubScorers(Query parent, Occur relationship,
|
||||
ScorerVisitor<Query, Query, Scorer> visitor) {
|
||||
if (weight == null)
|
||||
throw new UnsupportedOperationException();
|
||||
|
||||
final Query q = weight.getQuery();
|
||||
switch (relationship) {
|
||||
case MUST:
|
||||
visitor.visitRequired(parent, q, this);
|
||||
break;
|
||||
case MUST_NOT:
|
||||
visitor.visitProhibited(parent, q, this);
|
||||
break;
|
||||
case SHOULD:
|
||||
visitor.visitOptional(parent, q, this);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
||||
*/
|
||||
final class TermScorer extends Scorer {
|
||||
private Weight weight;
|
||||
private DocsEnum docsEnum;
|
||||
private byte[] norms;
|
||||
private float weightValue;
|
||||
|
@ -54,9 +54,8 @@ final class TermScorer extends Scorer {
|
|||
* The field norms of the document fields for the <code>Term</code>.
|
||||
*/
|
||||
TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) {
|
||||
super(similarity);
|
||||
super(similarity, weight);
|
||||
|
||||
this.weight = weight;
|
||||
this.docsEnum = td;
|
||||
this.norms = norms;
|
||||
this.weightValue = weight.getValue();
|
||||
|
@ -103,6 +102,11 @@ final class TermScorer extends Scorer {
|
|||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
return freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances to the next document matching the query. <br>
|
||||
* The iterator over the matching documents is buffered using
|
||||
|
@ -172,4 +176,5 @@ final class TermScorer extends Scorer {
|
|||
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
|
||||
}
|
||||
|
|
|
@ -300,7 +300,7 @@ public class CustomScoreQuery extends Query {
|
|||
// constructor
|
||||
private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w,
|
||||
Scorer subQueryScorer, Scorer[] valSrcScorers) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity,w);
|
||||
this.qWeight = w.getValue();
|
||||
this.subQueryScorer = subQueryScorer;
|
||||
this.valSrcScorers = valSrcScorers;
|
||||
|
|
|
@ -134,7 +134,7 @@ public class ValueSourceQuery extends Query {
|
|||
|
||||
// constructor
|
||||
private ValueSourceScorer(Similarity similarity, IndexReader reader, ValueSourceWeight w) throws IOException {
|
||||
super(similarity);
|
||||
super(similarity,w);
|
||||
qWeight = w.getValue();
|
||||
// this is when/where the values are first created.
|
||||
vals = valSrc.getValues(reader);
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.search.Similarity;
|
|||
*/
|
||||
public class SpanScorer extends Scorer {
|
||||
protected Spans spans;
|
||||
protected Weight weight;
|
||||
protected byte[] norms;
|
||||
protected float value;
|
||||
|
||||
|
@ -40,10 +39,9 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
|
||||
throws IOException {
|
||||
super(similarity);
|
||||
super(similarity, weight);
|
||||
this.spans = spans;
|
||||
this.norms = norms;
|
||||
this.weight = weight;
|
||||
this.value = weight.getValue();
|
||||
if (this.spans.next()) {
|
||||
doc = -1;
|
||||
|
@ -97,6 +95,11 @@ public class SpanScorer extends Scorer {
|
|||
float raw = getSimilarity().tf(freq) * value; // raw score
|
||||
return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
/** This method is no longer an official member of {@link Scorer},
|
||||
* but it is needed by SpanWeight to build an explanation. */
|
||||
|
|
|
@ -90,7 +90,7 @@ public class TestBooleanScorer extends LuceneTestCase
|
|||
}
|
||||
|
||||
}};
|
||||
BooleanScorer bs = new BooleanScorer(sim, 1, Arrays.asList(scorers), null, scorers.length);
|
||||
BooleanScorer bs = new BooleanScorer(null, sim, 1, Arrays.asList(scorers), null, scorers.length);
|
||||
|
||||
assertEquals("should have received 3000", 3000, bs.nextDoc());
|
||||
assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());
|
||||
|
|
|
@ -0,0 +1,226 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.Scorer.ScorerVisitor;
|
||||
import org.apache.lucene.store.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestSubScorerFreqs extends LuceneTestCaseJ4 {
|
||||
|
||||
private static Directory dir;
|
||||
private static IndexSearcher s;
|
||||
|
||||
@BeforeClass
|
||||
public static void makeIndex() throws Exception {
|
||||
dir = new RAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(
|
||||
newStaticRandom(TestSubScorerFreqs.class), dir);
|
||||
// make sure we have more than one segment occationally
|
||||
for (int i = 0; i < 31 * RANDOM_MULTIPLIER; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("f", "a b c d b c d c d d", Field.Store.NO,
|
||||
Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new Field("f", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
s = new IndexSearcher(w.getReader());
|
||||
w.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void finish() throws Exception {
|
||||
s.getIndexReader().close();
|
||||
s.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class CountingCollector extends Collector {
|
||||
private final Collector other;
|
||||
private int docBase;
|
||||
|
||||
public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<Integer, Map<Query, Float>>();
|
||||
|
||||
private final Map<Query, Scorer> subScorers = new HashMap<Query, Scorer>();
|
||||
private final ScorerVisitor<Query, Query, Scorer> visitor = new MockScorerVisitor();
|
||||
private final EnumSet<Occur> collect;
|
||||
|
||||
private class MockScorerVisitor extends ScorerVisitor<Query, Query, Scorer> {
|
||||
|
||||
@Override
|
||||
public void visitOptional(Query parent, Query child, Scorer scorer) {
|
||||
if (collect.contains(Occur.SHOULD))
|
||||
subScorers.put(child, scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitProhibited(Query parent, Query child, Scorer scorer) {
|
||||
if (collect.contains(Occur.MUST_NOT))
|
||||
subScorers.put(child, scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitRequired(Query parent, Query child, Scorer scorer) {
|
||||
if (collect.contains(Occur.MUST))
|
||||
subScorers.put(child, scorer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public CountingCollector(Collector other) {
|
||||
this(other, EnumSet.allOf(Occur.class));
|
||||
}
|
||||
|
||||
public CountingCollector(Collector other, EnumSet<Occur> collect) {
|
||||
this.other = other;
|
||||
this.collect = collect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
other.setScorer(scorer);
|
||||
scorer.visitScorers(visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final Map<Query, Float> freqs = new HashMap<Query, Float>();
|
||||
for (Map.Entry<Query, Scorer> ent : subScorers.entrySet()) {
|
||||
Scorer value = ent.getValue();
|
||||
int matchId = value.docID();
|
||||
freqs.put(ent.getKey(), matchId == doc ? value.freq() : 0.0f);
|
||||
}
|
||||
docCounts.put(doc + docBase, freqs);
|
||||
other.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase)
|
||||
throws IOException {
|
||||
this.docBase = docBase;
|
||||
other.setNextReader(reader, docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return other.acceptsDocsOutOfOrder();
|
||||
}
|
||||
}
|
||||
|
||||
private static final float FLOAT_TOLERANCE = 0.00001F;
|
||||
|
||||
@Test
|
||||
public void testTermQuery() throws Exception {
|
||||
TermQuery q = new TermQuery(new Term("f", "d"));
|
||||
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10,
|
||||
true));
|
||||
s.search(q, null, c);
|
||||
final int maxDocs = s.maxDoc();
|
||||
assertEquals(maxDocs, c.docCounts.size());
|
||||
for (int i = 0; i < maxDocs; i++) {
|
||||
Map<Query, Float> doc0 = c.docCounts.get(i);
|
||||
assertEquals(1, doc0.size());
|
||||
assertEquals(4.0F, doc0.get(q), FLOAT_TOLERANCE);
|
||||
|
||||
Map<Query, Float> doc1 = c.docCounts.get(++i);
|
||||
assertEquals(1, doc1.size());
|
||||
assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test
|
||||
public void testBooleanQuery() throws Exception {
|
||||
TermQuery aQuery = new TermQuery(new Term("f", "a"));
|
||||
TermQuery dQuery = new TermQuery(new Term("f", "d"));
|
||||
TermQuery cQuery = new TermQuery(new Term("f", "c"));
|
||||
TermQuery yQuery = new TermQuery(new Term("f", "y"));
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
|
||||
inner.add(cQuery, Occur.SHOULD);
|
||||
inner.add(yQuery, Occur.MUST_NOT);
|
||||
query.add(inner, Occur.MUST);
|
||||
query.add(aQuery, Occur.MUST);
|
||||
query.add(dQuery, Occur.MUST);
|
||||
EnumSet<Occur>[] occurList = new EnumSet[] {EnumSet.of(Occur.MUST), EnumSet.of(Occur.MUST, Occur.SHOULD)};
|
||||
for (EnumSet<Occur> occur : occurList) {
|
||||
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(
|
||||
10, true), occur);
|
||||
s.search(query, null, c);
|
||||
final int maxDocs = s.maxDoc();
|
||||
assertEquals(maxDocs, c.docCounts.size());
|
||||
boolean includeOptional = occur.contains(Occur.SHOULD);
|
||||
for (int i = 0; i < maxDocs; i++) {
|
||||
Map<Query, Float> doc0 = c.docCounts.get(i);
|
||||
assertEquals(includeOptional ? 5 : 4, doc0.size());
|
||||
assertEquals(1.0F, doc0.get(aQuery), FLOAT_TOLERANCE);
|
||||
assertEquals(4.0F, doc0.get(dQuery), FLOAT_TOLERANCE);
|
||||
if (includeOptional)
|
||||
assertEquals(3.0F, doc0.get(cQuery), FLOAT_TOLERANCE);
|
||||
|
||||
Map<Query, Float> doc1 = c.docCounts.get(++i);
|
||||
assertEquals(includeOptional ? 5 : 4, doc1.size());
|
||||
assertEquals(1.0F, doc1.get(aQuery), FLOAT_TOLERANCE);
|
||||
assertEquals(1.0F, doc1.get(dQuery), FLOAT_TOLERANCE);
|
||||
if (includeOptional)
|
||||
assertEquals(1.0F, doc1.get(cQuery), FLOAT_TOLERANCE);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPhraseQuery() throws Exception {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.add(new Term("f", "b"));
|
||||
q.add(new Term("f", "c"));
|
||||
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10,
|
||||
true));
|
||||
s.search(q, null, c);
|
||||
final int maxDocs = s.maxDoc();
|
||||
assertEquals(maxDocs, c.docCounts.size());
|
||||
for (int i = 0; i < maxDocs; i++) {
|
||||
Map<Query, Float> doc0 = c.docCounts.get(i);
|
||||
assertEquals(1, doc0.size());
|
||||
assertEquals(2.0F, doc0.get(q), FLOAT_TOLERANCE);
|
||||
|
||||
Map<Query, Float> doc1 = c.docCounts.get(++i);
|
||||
assertEquals(1, doc1.size());
|
||||
assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue