LUCENE-2590: Enable access to the freq information in a Query's sub-scorers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@991310 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2010-08-31 19:50:12 +00:00
parent d97f1f25d6
commit 5fe8094027
19 changed files with 443 additions and 50 deletions

View File

@ -208,6 +208,11 @@ New features
* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike * LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike
McCandless) McCandless)
* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq. Along
with a custom Collector these experimental methods make it possible
to gather the hit-count per sub-clause and per document while a
search is running. (Simon Willnauer, Mike McCandless)
Optimizations Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.

View File

@ -320,7 +320,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
// Check if we can return a BooleanScorer // Check if we can return a BooleanScorer
if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) { if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited, maxCoord); return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
} }
if (required.size() == 0 && optional.size() == 0) { if (required.size() == 0 && optional.size() == 0) {
@ -334,7 +334,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
// Return a BooleanScorer2 // Return a BooleanScorer2
return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional, maxCoord); return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
} }
@Override @Override

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
/* Description from Doug Cutting (excerpted from /* Description from Doug Cutting (excerpted from
* LUCENE-1483): * LUCENE-1483):
@ -115,6 +116,7 @@ final class BooleanScorer extends Scorer {
float score; float score;
int doc = NO_MORE_DOCS; int doc = NO_MORE_DOCS;
int freq;
public BucketScorer() { super(null); } public BucketScorer() { super(null); }
@ -124,6 +126,9 @@ final class BooleanScorer extends Scorer {
@Override @Override
public int docID() { return doc; } public int docID() { return doc; }
@Override
public float freq() { return freq; }
@Override @Override
public int nextDoc() throws IOException { return NO_MORE_DOCS; } public int nextDoc() throws IOException { return NO_MORE_DOCS; }
@ -159,7 +164,8 @@ final class BooleanScorer extends Scorer {
static final class SubScorer { static final class SubScorer {
public Scorer scorer; public Scorer scorer;
public boolean required = false; // TODO: re-enable this if BQ ever sends us required clauses
//public boolean required = false;
public boolean prohibited = false; public boolean prohibited = false;
public Collector collector; public Collector collector;
public SubScorer next; public SubScorer next;
@ -167,8 +173,12 @@ final class BooleanScorer extends Scorer {
public SubScorer(Scorer scorer, boolean required, boolean prohibited, public SubScorer(Scorer scorer, boolean required, boolean prohibited,
Collector collector, SubScorer next) Collector collector, SubScorer next)
throws IOException { throws IOException {
if (required) {
throw new IllegalArgumentException("this scorer cannot handle required=true");
}
this.scorer = scorer; this.scorer = scorer;
this.required = required; // TODO: re-enable this if BQ ever sends us required clauses
//this.required = required;
this.prohibited = prohibited; this.prohibited = prohibited;
this.collector = collector; this.collector = collector;
this.next = next; this.next = next;
@ -178,7 +188,8 @@ final class BooleanScorer extends Scorer {
private SubScorer scorers = null; private SubScorer scorers = null;
private BucketTable bucketTable = new BucketTable(); private BucketTable bucketTable = new BucketTable();
private final float[] coordFactors; private final float[] coordFactors;
private int requiredMask = 0; // TODO: re-enable this if BQ ever sends us required clauses
//private int requiredMask = 0;
private int prohibitedMask = 0; private int prohibitedMask = 0;
private int nextMask = 1; private int nextMask = 1;
private final int minNrShouldMatch; private final int minNrShouldMatch;
@ -186,9 +197,9 @@ final class BooleanScorer extends Scorer {
private Bucket current; private Bucket current;
private int doc = -1; private int doc = -1;
BooleanScorer(Similarity similarity, int minNrShouldMatch, BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch,
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException { List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
super(similarity); super(similarity, weight);
this.minNrShouldMatch = minNrShouldMatch; this.minNrShouldMatch = minNrShouldMatch;
if (optionalScorers != null && optionalScorers.size() > 0) { if (optionalScorers != null && optionalScorers.size() > 0) {
@ -231,8 +242,11 @@ final class BooleanScorer extends Scorer {
while (current != null) { // more queued while (current != null) { // more queued
// check prohibited & required // check prohibited & required
if ((current.bits & prohibitedMask) == 0 && if ((current.bits & prohibitedMask) == 0) {
(current.bits & requiredMask) == requiredMask) {
// TODO: re-enable this if BQ ever sends us required
// clauses
//&& (current.bits & requiredMask) == requiredMask) {
if (current.doc >= max){ if (current.doc >= max){
tmp = current; tmp = current;
@ -245,6 +259,7 @@ final class BooleanScorer extends Scorer {
if (current.coord >= minNrShouldMatch) { if (current.coord >= minNrShouldMatch) {
bs.score = current.score * coordFactors[current.coord]; bs.score = current.score * coordFactors[current.coord];
bs.doc = current.doc; bs.doc = current.doc;
bs.freq = current.coord;
collector.collect(current.doc); collector.collect(current.doc);
} }
} }
@ -294,8 +309,9 @@ final class BooleanScorer extends Scorer {
// check prohibited & required, and minNrShouldMatch // check prohibited & required, and minNrShouldMatch
if ((current.bits & prohibitedMask) == 0 && if ((current.bits & prohibitedMask) == 0 &&
(current.bits & requiredMask) == requiredMask &&
current.coord >= minNrShouldMatch) { current.coord >= minNrShouldMatch) {
// TODO: re-enable this if BQ ever sends us required clauses
// (current.bits & requiredMask) == requiredMask &&
return doc = current.doc; return doc = current.doc;
} }
} }
@ -340,4 +356,27 @@ final class BooleanScorer extends Scorer {
return buffer.toString(); return buffer.toString();
} }
@Override
protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
super.visitSubScorers(parent, relationship, visitor);
final Query q = weight.getQuery();
SubScorer sub = scorers;
while(sub != null) {
// TODO: re-enable this if BQ ever sends us required
//clauses
//if (sub.required) {
//relationship = Occur.MUST;
if (!sub.prohibited) {
relationship = Occur.SHOULD;
} else {
// TODO: maybe it's pointless to do this, but, it is
// possible the doc may still be collected, eg foo
// OR (bar -fee)
relationship = Occur.MUST_NOT;
}
sub.scorer.visitSubScorers(q, relationship, visitor);
sub = sub.next;
}
}
} }

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.search.BooleanClause.Occur;
/* See the description in BooleanScorer.java, comparing /* See the description in BooleanScorer.java, comparing
* BooleanScorer & BooleanScorer2 */ * BooleanScorer & BooleanScorer2 */
@ -80,9 +82,9 @@ class BooleanScorer2 extends Scorer {
* @param optional * @param optional
* the list of optional scorers. * the list of optional scorers.
*/ */
public BooleanScorer2(Similarity similarity, int minNrShouldMatch, public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch,
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException { List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
super(similarity); super(similarity, weight);
if (minNrShouldMatch < 0) { if (minNrShouldMatch < 0) {
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative"); throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
} }
@ -301,10 +303,28 @@ class BooleanScorer2 extends Scorer {
return sum * coordinator.coordFactors[coordinator.nrMatchers]; return sum * coordinator.coordFactors[coordinator.nrMatchers];
} }
@Override
public float freq() {
return coordinator.nrMatchers;
}
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
return doc = countingSumScorer.advance(target); return doc = countingSumScorer.advance(target);
} }
@Override
protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
super.visitSubScorers(parent, relationship, visitor);
final Query q = weight.getQuery();
for (Scorer s : optionalScorers) {
s.visitSubScorers(q, Occur.SHOULD, visitor);
}
for (Scorer s : prohibitedScorers) {
s.visitSubScorers(q, Occur.MUST_NOT, visitor);
}
for (Scorer s : requiredScorers) {
s.visitSubScorers(q, Occur.MUST, visitor);
}
}
} }

View File

@ -123,7 +123,7 @@ public class ConstantScoreQuery extends Query {
int doc = -1; int doc = -1;
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
super(similarity); super(similarity,w);
theScore = w.getValue(); theScore = w.getValue();
DocIdSet docIdSet = filter.getDocIdSet(reader); DocIdSet docIdSet = filter.getDocIdSet(reader);
if (docIdSet == null) { if (docIdSet == null) {

View File

@ -21,9 +21,9 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.search.BooleanClause.Occur;
final class ExactPhraseScorer extends Scorer { final class ExactPhraseScorer extends Scorer {
private final Weight weight;
private final byte[] norms; private final byte[] norms;
private final float value; private final float value;
@ -63,8 +63,7 @@ final class ExactPhraseScorer extends Scorer {
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity similarity, byte[] norms) throws IOException { Similarity similarity, byte[] norms) throws IOException {
super(similarity); super(similarity, weight);
this.weight = weight;
this.norms = norms; this.norms = norms;
this.value = weight.getValue(); this.value = weight.getValue();
@ -193,8 +192,8 @@ final class ExactPhraseScorer extends Scorer {
return "ExactPhraseScorer(" + weight + ")"; return "ExactPhraseScorer(" + weight + ")";
} }
// used by MultiPhraseQuery @Override
float currentFreq() { public float freq() {
return freq; return freq;
} }

View File

@ -126,7 +126,7 @@ extends Query {
return null; return null;
} }
return new Scorer(similarity) { return new Scorer(similarity, this) {
private int doc = -1; private int doc = -1;

View File

@ -54,7 +54,7 @@ public class MatchAllDocsQuery extends Query {
MatchAllScorer(IndexReader reader, Similarity similarity, Weight w, MatchAllScorer(IndexReader reader, Similarity similarity, Weight w,
byte[] norms) throws IOException { byte[] norms) throws IOException {
super(similarity); super(similarity,w);
delDocs = MultiFields.getDeletedDocs(reader); delDocs = MultiFields.getDeletedDocs(reader);
score = w.getValue(); score = w.getValue();
maxDoc = reader.maxDoc(); maxDoc = reader.maxDoc();

View File

@ -271,11 +271,7 @@ public class MultiPhraseQuery extends Query {
int d = scorer.advance(doc); int d = scorer.advance(doc);
float phraseFreq; float phraseFreq;
if (d == doc) { if (d == doc) {
if (slop == 0) { phraseFreq = scorer.freq();
phraseFreq = ((ExactPhraseScorer) scorer).currentFreq();
} else {
phraseFreq = ((SloppyPhraseScorer) scorer).currentFreq();
}
} else { } else {
phraseFreq = 0.0f; phraseFreq = 0.0f;
} }

View File

@ -275,11 +275,7 @@ public class PhraseQuery extends Query {
int d = scorer.advance(doc); int d = scorer.advance(doc);
float phraseFreq; float phraseFreq;
if (d == doc) { if (d == doc) {
if (slop == 0) { phraseFreq = scorer.freq();
phraseFreq = ((ExactPhraseScorer) scorer).currentFreq();
} else {
phraseFreq = ((SloppyPhraseScorer) scorer).currentFreq();
}
} else { } else {
phraseFreq = 0.0f; phraseFreq = 0.0f;
} }

View File

@ -19,6 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.search.BooleanClause.Occur;
/** Expert: Scoring functionality for phrase queries. /** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms * <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positions. What "valid positions" are * at "valid" positions. What "valid positions" are
@ -30,7 +32,6 @@ import java.io.IOException;
* means a match. * means a match.
*/ */
abstract class PhraseScorer extends Scorer { abstract class PhraseScorer extends Scorer {
private Weight weight;
protected byte[] norms; protected byte[] norms;
protected float value; protected float value;
@ -43,9 +44,8 @@ abstract class PhraseScorer extends Scorer {
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity similarity, byte[] norms) { Similarity similarity, byte[] norms) {
super(similarity); super(similarity, weight);
this.norms = norms; this.norms = norms;
this.weight = weight;
this.value = weight.getValue(); this.value = weight.getValue();
// convert tps to a list of phrase positions. // convert tps to a list of phrase positions.
@ -129,7 +129,10 @@ abstract class PhraseScorer extends Scorer {
/** /**
* phrase frequency in current doc as computed by phraseFreq(). * phrase frequency in current doc as computed by phraseFreq().
*/ */
public final float currentFreq() { return freq; } @Override
public final float freq() {
return freq;
}
/** /**
* For a document containing all the phrase query terms, compute the * For a document containing all the phrase query terms, compute the

View File

@ -32,7 +32,7 @@ import java.io.IOException;
*/ */
public class ScoreCachingWrappingScorer extends Scorer { public class ScoreCachingWrappingScorer extends Scorer {
private Scorer scorer; private final Scorer scorer;
private int curDoc = -1; private int curDoc = -1;
private float curScore; private float curScore;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.search.BooleanClause.Occur;
/** /**
* Expert: Common scoring functionality for different types of queries. * Expert: Common scoring functionality for different types of queries.
* *
@ -39,12 +41,23 @@ import java.io.IOException;
*/ */
public abstract class Scorer extends DocIdSetIterator { public abstract class Scorer extends DocIdSetIterator {
private final Similarity similarity; private final Similarity similarity;
protected final Weight weight;
/** Constructs a Scorer. /** Constructs a Scorer.
* @param similarity The <code>Similarity</code> implementation used by this scorer. * @param similarity The <code>Similarity</code> implementation used by this scorer.
*/ */
protected Scorer(Similarity similarity) { protected Scorer(Similarity similarity) {
this(similarity, null);
}
/**
* Constructs a Scorer
* @param similarity The <code>Similarity</code> implementation used by this scorer.
* @param weight The scorers <code>Weight</code>
*/
protected Scorer(Similarity similarity, Weight weight) {
this.similarity = similarity; this.similarity = similarity;
this.weight = weight;
} }
/** Returns the Similarity implementation used by this scorer. */ /** Returns the Similarity implementation used by this scorer. */
@ -94,4 +107,92 @@ public abstract class Scorer extends DocIdSetIterator {
*/ */
public abstract float score() throws IOException; public abstract float score() throws IOException;
/** Returns number of matches for the current document.
* This returns a float (not int) because
* SloppyPhraseScorer discounts its freq according to how
* "sloppy" the match was.
*
* @lucene.experimental */
public float freq() throws IOException {
throw new UnsupportedOperationException(this + " does not implement freq()");
}
/**
* A callback to gather information from a scorer and its sub-scorers. Each
* the top-level scorer as well as each of its sub-scorers are passed to
* either one of the visit methods depending on their boolean relationship in
* the query.
* @lucene.experimental
*/
public static abstract class ScorerVisitor<P extends Query, C extends Query, S extends Scorer> {
/**
* Invoked for all optional scorer
*
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
* @param child the query of the currently visited scorer
* @param scorer the current scorer
*/
public void visitOptional(P parent, C child, S scorer) {}
/**
* Invoked for all required scorer
*
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
* @param child the query of the currently visited scorer
* @param scorer the current scorer
*/
public void visitRequired(P parent, C child, S scorer) {}
/**
* Invoked for all prohibited scorer
*
* @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
* @param child the query of the currently visited scorer
* @param scorer the current scorer
*/
public void visitProhibited(P parent, C child, S scorer) {}
}
/**
* Expert: call this to gather details for all sub-scorers for this query.
* This can be used, in conjunction with a custom {@link Collector} to gather
* details about how each sub-query matched the current hit.
*
* @param visitor a callback executed for each sub-scorer
* @lucene.experimental
*/
public void visitScorers(ScorerVisitor<Query, Query, Scorer> visitor) {
visitSubScorers(null, Occur.MUST/*must id default*/, visitor);
}
/**
* {@link Scorer} subclasses should implement this method if the subclass
* itself contains multiple scorers to support gathering details for
* sub-scorers via {@link ScorerVisitor}
* <p>
* Note: this method will throw {@link UnsupportedOperationException} if no
* associated {@link Weight} instance is provided to
* {@link #Scorer(Similarity, Weight)}
* </p>
*
* @lucene.experimental
*/
protected void visitSubScorers(Query parent, Occur relationship,
ScorerVisitor<Query, Query, Scorer> visitor) {
if (weight == null)
throw new UnsupportedOperationException();
final Query q = weight.getQuery();
switch (relationship) {
case MUST:
visitor.visitRequired(parent, q, this);
break;
case MUST_NOT:
visitor.visitProhibited(parent, q, this);
break;
case SHOULD:
visitor.visitOptional(parent, q, this);
break;
}
}
} }

View File

@ -20,11 +20,11 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
*/ */
final class TermScorer extends Scorer { final class TermScorer extends Scorer {
private Weight weight;
private DocsEnum docsEnum; private DocsEnum docsEnum;
private byte[] norms; private byte[] norms;
private float weightValue; private float weightValue;
@ -54,9 +54,8 @@ final class TermScorer extends Scorer {
* The field norms of the document fields for the <code>Term</code>. * The field norms of the document fields for the <code>Term</code>.
*/ */
TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) { TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) {
super(similarity); super(similarity, weight);
this.weight = weight;
this.docsEnum = td; this.docsEnum = td;
this.norms = norms; this.norms = norms;
this.weightValue = weight.getValue(); this.weightValue = weight.getValue();
@ -103,6 +102,11 @@ final class TermScorer extends Scorer {
return doc; return doc;
} }
@Override
public float freq() {
return freq;
}
/** /**
* Advances to the next document matching the query. <br> * Advances to the next document matching the query. <br>
* The iterator over the matching documents is buffered using * The iterator over the matching documents is buffered using
@ -172,4 +176,5 @@ final class TermScorer extends Scorer {
/** Returns a string representation of this <code>TermScorer</code>. */ /** Returns a string representation of this <code>TermScorer</code>. */
@Override @Override
public String toString() { return "scorer(" + weight + ")"; } public String toString() { return "scorer(" + weight + ")"; }
} }

View File

@ -300,7 +300,7 @@ public class CustomScoreQuery extends Query {
// constructor // constructor
private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w, private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w,
Scorer subQueryScorer, Scorer[] valSrcScorers) throws IOException { Scorer subQueryScorer, Scorer[] valSrcScorers) throws IOException {
super(similarity); super(similarity,w);
this.qWeight = w.getValue(); this.qWeight = w.getValue();
this.subQueryScorer = subQueryScorer; this.subQueryScorer = subQueryScorer;
this.valSrcScorers = valSrcScorers; this.valSrcScorers = valSrcScorers;

View File

@ -134,7 +134,7 @@ public class ValueSourceQuery extends Query {
// constructor // constructor
private ValueSourceScorer(Similarity similarity, IndexReader reader, ValueSourceWeight w) throws IOException { private ValueSourceScorer(Similarity similarity, IndexReader reader, ValueSourceWeight w) throws IOException {
super(similarity); super(similarity,w);
qWeight = w.getValue(); qWeight = w.getValue();
// this is when/where the values are first created. // this is when/where the values are first created.
vals = valSrc.getValues(reader); vals = valSrc.getValues(reader);

View File

@ -29,7 +29,6 @@ import org.apache.lucene.search.Similarity;
*/ */
public class SpanScorer extends Scorer { public class SpanScorer extends Scorer {
protected Spans spans; protected Spans spans;
protected Weight weight;
protected byte[] norms; protected byte[] norms;
protected float value; protected float value;
@ -40,10 +39,9 @@ public class SpanScorer extends Scorer {
protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
throws IOException { throws IOException {
super(similarity); super(similarity, weight);
this.spans = spans; this.spans = spans;
this.norms = norms; this.norms = norms;
this.weight = weight;
this.value = weight.getValue(); this.value = weight.getValue();
if (this.spans.next()) { if (this.spans.next()) {
doc = -1; doc = -1;
@ -98,6 +96,11 @@ public class SpanScorer extends Scorer {
return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize
} }
@Override
public float freq() throws IOException {
return freq;
}
/** This method is no longer an official member of {@link Scorer}, /** This method is no longer an official member of {@link Scorer},
* but it is needed by SpanWeight to build an explanation. */ * but it is needed by SpanWeight to build an explanation. */
protected Explanation explain(final int doc) throws IOException { protected Explanation explain(final int doc) throws IOException {

View File

@ -90,7 +90,7 @@ public class TestBooleanScorer extends LuceneTestCase
} }
}}; }};
BooleanScorer bs = new BooleanScorer(sim, 1, Arrays.asList(scorers), null, scorers.length); BooleanScorer bs = new BooleanScorer(null, sim, 1, Arrays.asList(scorers), null, scorers.length);
assertEquals("should have received 3000", 3000, bs.nextDoc()); assertEquals("should have received 3000", 3000, bs.nextDoc());
assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc()); assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());

View File

@ -0,0 +1,226 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.util.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.Scorer.ScorerVisitor;
import org.apache.lucene.store.*;
import java.util.*;
import java.io.*;
import org.junit.Test;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import static org.junit.Assert.*;
public class TestSubScorerFreqs extends LuceneTestCaseJ4 {
private static Directory dir;
private static IndexSearcher s;
@BeforeClass
public static void makeIndex() throws Exception {
dir = new RAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(
newStaticRandom(TestSubScorerFreqs.class), dir);
// make sure we have more than one segment occationally
for (int i = 0; i < 31 * RANDOM_MULTIPLIER; i++) {
Document doc = new Document();
doc.add(new Field("f", "a b c d b c d c d d", Field.Store.NO,
Field.Index.ANALYZED));
w.addDocument(doc);
doc = new Document();
doc.add(new Field("f", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
}
s = new IndexSearcher(w.getReader());
w.close();
}
@AfterClass
public static void finish() throws Exception {
s.getIndexReader().close();
s.close();
dir.close();
}
private static class CountingCollector extends Collector {
private final Collector other;
private int docBase;
public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<Integer, Map<Query, Float>>();
private final Map<Query, Scorer> subScorers = new HashMap<Query, Scorer>();
private final ScorerVisitor<Query, Query, Scorer> visitor = new MockScorerVisitor();
private final EnumSet<Occur> collect;
private class MockScorerVisitor extends ScorerVisitor<Query, Query, Scorer> {
@Override
public void visitOptional(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.SHOULD))
subScorers.put(child, scorer);
}
@Override
public void visitProhibited(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.MUST_NOT))
subScorers.put(child, scorer);
}
@Override
public void visitRequired(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.MUST))
subScorers.put(child, scorer);
}
}
public CountingCollector(Collector other) {
this(other, EnumSet.allOf(Occur.class));
}
public CountingCollector(Collector other, EnumSet<Occur> collect) {
this.other = other;
this.collect = collect;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
scorer.visitScorers(visitor);
}
@Override
public void collect(int doc) throws IOException {
final Map<Query, Float> freqs = new HashMap<Query, Float>();
for (Map.Entry<Query, Scorer> ent : subScorers.entrySet()) {
Scorer value = ent.getValue();
int matchId = value.docID();
freqs.put(ent.getKey(), matchId == doc ? value.freq() : 0.0f);
}
docCounts.put(doc + docBase, freqs);
other.collect(doc);
}
@Override
public void setNextReader(IndexReader reader, int docBase)
throws IOException {
this.docBase = docBase;
other.setNextReader(reader, docBase);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return other.acceptsDocsOutOfOrder();
}
}
private static final float FLOAT_TOLERANCE = 0.00001F;
@Test
public void testTermQuery() throws Exception {
TermQuery q = new TermQuery(new Term("f", "d"));
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10,
true));
s.search(q, null, c);
final int maxDocs = s.maxDoc();
assertEquals(maxDocs, c.docCounts.size());
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(1, doc0.size());
assertEquals(4.0F, doc0.get(q), FLOAT_TOLERANCE);
Map<Query, Float> doc1 = c.docCounts.get(++i);
assertEquals(1, doc1.size());
assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE);
}
}
@SuppressWarnings("unchecked")
@Test
public void testBooleanQuery() throws Exception {
TermQuery aQuery = new TermQuery(new Term("f", "a"));
TermQuery dQuery = new TermQuery(new Term("f", "d"));
TermQuery cQuery = new TermQuery(new Term("f", "c"));
TermQuery yQuery = new TermQuery(new Term("f", "y"));
BooleanQuery query = new BooleanQuery();
BooleanQuery inner = new BooleanQuery();
inner.add(cQuery, Occur.SHOULD);
inner.add(yQuery, Occur.MUST_NOT);
query.add(inner, Occur.MUST);
query.add(aQuery, Occur.MUST);
query.add(dQuery, Occur.MUST);
EnumSet<Occur>[] occurList = new EnumSet[] {EnumSet.of(Occur.MUST), EnumSet.of(Occur.MUST, Occur.SHOULD)};
for (EnumSet<Occur> occur : occurList) {
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(
10, true), occur);
s.search(query, null, c);
final int maxDocs = s.maxDoc();
assertEquals(maxDocs, c.docCounts.size());
boolean includeOptional = occur.contains(Occur.SHOULD);
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(includeOptional ? 5 : 4, doc0.size());
assertEquals(1.0F, doc0.get(aQuery), FLOAT_TOLERANCE);
assertEquals(4.0F, doc0.get(dQuery), FLOAT_TOLERANCE);
if (includeOptional)
assertEquals(3.0F, doc0.get(cQuery), FLOAT_TOLERANCE);
Map<Query, Float> doc1 = c.docCounts.get(++i);
assertEquals(includeOptional ? 5 : 4, doc1.size());
assertEquals(1.0F, doc1.get(aQuery), FLOAT_TOLERANCE);
assertEquals(1.0F, doc1.get(dQuery), FLOAT_TOLERANCE);
if (includeOptional)
assertEquals(1.0F, doc1.get(cQuery), FLOAT_TOLERANCE);
}
}
}
@Test
public void testPhraseQuery() throws Exception {
PhraseQuery q = new PhraseQuery();
q.add(new Term("f", "b"));
q.add(new Term("f", "c"));
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10,
true));
s.search(q, null, c);
final int maxDocs = s.maxDoc();
assertEquals(maxDocs, c.docCounts.size());
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(1, doc0.size());
assertEquals(2.0F, doc0.get(q), FLOAT_TOLERANCE);
Map<Query, Float> doc1 = c.docCounts.get(++i);
assertEquals(1, doc1.size());
assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE);
}
}
}