LUCENE-7628: Scorer.getChildren() returns only matching Scorers

This commit is contained in:
Alan Woodward 2017-01-16 14:45:45 +00:00
parent c7fdd10971
commit 5bdc492c9c
9 changed files with 82 additions and 33 deletions

View File

@ -77,6 +77,11 @@ API Changes
* LUCENE-7643: Replaced doc-values queries in lucene/sandbox with factory
methods on the *DocValuesField classes. (Adrien Grand)
* LUCENE-7628: Scorer.getChildren() now only returns Scorers that are
positioned on the current document, and can throw an IOException.
AssertingScorer checks that getChildren() is not called on an unpositioned
Scorer. (Alan Woodward, Adrien Grand)
New Features
* LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,

View File

@ -194,9 +194,9 @@ abstract class DisjunctionScorer extends Scorer {
protected abstract float score(DisiWrapper topList) throws IOException;
@Override
public final Collection<ChildScorer> getChildren() {
public final Collection<ChildScorer> getChildren() throws IOException {
ArrayList<ChildScorer> children = new ArrayList<>();
for (DisiWrapper scorer : subScorers) {
for (DisiWrapper scorer = getSubMatches(); scorer != null; scorer = scorer.next) {
children.add(new ChildScorer(scorer.scorer, "SHOULD"));
}
return children;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.LongStream;
import java.util.stream.StreamSupport;
@ -90,7 +89,6 @@ final class MinShouldMatchSumScorer extends Scorer {
final DisiWrapper[] tail;
int tailSize;
final Collection<ChildScorer> childScorers;
final long cost;
MinShouldMatchSumScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch) {
@ -115,17 +113,17 @@ final class MinShouldMatchSumScorer extends Scorer {
addLead(new DisiWrapper(scorer));
}
List<ChildScorer> children = new ArrayList<>();
for (Scorer scorer : scorers) {
children.add(new ChildScorer(scorer, "SHOULD"));
}
this.childScorers = Collections.unmodifiableCollection(children);
this.cost = cost(scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost), scorers.size(), minShouldMatch);
}
@Override
public final Collection<ChildScorer> getChildren() {
return childScorers;
public final Collection<ChildScorer> getChildren() throws IOException {
List<ChildScorer> matchingScorers = new ArrayList<>();
updateFreq();
for (DisiWrapper s = lead; s != null; s = s.next) {
matchingScorers.add(new ChildScorer(s.scorer, "SHOULD"));
}
return matchingScorers;
}
@Override

View File

@ -77,9 +77,17 @@ public abstract class Scorer {
return weight;
}
/** Returns child sub-scorers
* @lucene.experimental */
public Collection<ChildScorer> getChildren() {
/**
* Returns child sub-scorers positioned on the current document
*
* Note that this method should not be called on Scorers passed to {@link LeafCollector#setScorer(Scorer)},
* as these may be synthetic Scorers produced by {@link BulkScorer} which will throw an Exception.
*
* This method should only be called when the Scorer is positioned
*
* @lucene.experimental
*/
public Collection<ChildScorer> getChildren() throws IOException {
return Collections.emptyList();
}

View File

@ -169,7 +169,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
};
}
private void fillLeaves(Scorer scorer, Set<Scorer> set) {
private void fillLeaves(Scorer scorer, Set<Scorer> set) throws IOException {
if (scorer.getWeight().getQuery() instanceof TermQuery) {
set.add(scorer);
} else {
@ -186,7 +186,40 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
public int freq(int doc) throws IOException {
return docCounts.get(doc);
}
}
public void testDisjunctionMatches() throws IOException {
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
bq1.add(new PhraseQuery(F2, "search", "engine"), Occur.SHOULD);
Weight w1 = scorerSearcher.createNormalizedWeight(bq1.build(), true);
Scorer s1 = w1.scorer(reader.leaves().get(0));
assertEquals(0, s1.iterator().nextDoc());
assertEquals(2, s1.getChildren().size());
BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
bq2.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
bq2.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
Weight w2 = scorerSearcher.createNormalizedWeight(bq2.build(), true);
Scorer s2 = w2.scorer(reader.leaves().get(0));
assertEquals(0, s2.iterator().nextDoc());
assertEquals(1, s2.getChildren().size());
}
public void testMinShouldMatchMatches() throws IOException {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
bq.add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD);
bq.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
bq.setMinimumNumberShouldMatch(2);
Weight w = scorerSearcher.createNormalizedWeight(bq.build(), true);
Scorer s = w.scorer(reader.leaves().get(0));
assertEquals(0, s.iterator().nextDoc());
assertEquals(2, s.getChildren().size());
}
public void testGetChildrenMinShouldMatchSumScorer() throws IOException {
@ -203,12 +236,12 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
for (String summary : collector.getSummaries()) {
assertEquals(
"ConjunctionScorer\n" +
" MUST ConstantScoreScorer\n" +
" MUST MinShouldMatchSumScorer\n" +
" SHOULD TermScorer body:nutch\n" +
" SHOULD TermScorer body:crawler\n" +
" SHOULD TermScorer body:web",
summary);
" MUST ConstantScoreScorer\n" +
" MUST MinShouldMatchSumScorer\n" +
" SHOULD TermScorer body:web\n" +
" SHOULD TermScorer body:crawler\n" +
" SHOULD TermScorer body:nutch",
summary);
}
}
@ -261,7 +294,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
};
}
private static void summarizeScorer(final StringBuilder builder, final Scorer scorer, final int indent) {
private static void summarizeScorer(final StringBuilder builder, final Scorer scorer, final int indent) throws IOException {
builder.append(scorer.getClass().getSimpleName());
if (scorer instanceof TermScorer) {
TermQuery termQuery = (TermQuery) scorer.getWeight().getQuery();

View File

@ -68,7 +68,6 @@ public class TestSubScorerFreqs extends LuceneTestCase {
private static class CountingCollector extends FilterCollector {
public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<>();
private final Map<Query, Scorer> subScorers = new HashMap<>();
private final Set<String> relationships;
public CountingCollector(Collector other) {
@ -79,24 +78,29 @@ public class TestSubScorerFreqs extends LuceneTestCase {
super(other);
this.relationships = relationships;
}
public void setSubScorers(Scorer scorer, String relationship) {
private Map<Query, Scorer> getSubScorers(Scorer scorer) throws IOException {
Map<Query, Scorer> collected = new HashMap<>();
for (ChildScorer child : scorer.getChildren()) {
if (scorer instanceof AssertingScorer || relationships.contains(child.relationship)) {
setSubScorers(child.child, child.relationship);
collected.put(scorer.getWeight().getQuery(), scorer);
}
collected.putAll(getSubScorers(child.child));
}
subScorers.put(scorer.getWeight().getQuery(), scorer);
return collected;
}
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
final int docBase = context.docBase;
return new FilterLeafCollector(super.getLeafCollector(context)) {
Scorer scorer;
@Override
public void collect(int doc) throws IOException {
final Map<Query, Float> freqs = new HashMap<Query, Float>();
final Map<Query, Scorer> subScorers = getSubScorers(scorer);
for (Map.Entry<Query, Scorer> ent : subScorers.entrySet()) {
Scorer value = ent.getValue();
int matchId = value.docID();
@ -109,8 +113,7 @@ public class TestSubScorerFreqs extends LuceneTestCase {
@Override
public void setScorer(Scorer scorer) throws IOException {
super.setScorer(scorer);
subScorers.clear();
setSubScorers(scorer, "TOP");
this.scorer = scorer;
}
};

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -32,7 +33,7 @@ class AssertingSubDocsAtOnceCollector extends SimpleCollector {
List<Scorer> allScorers;
@Override
public void setScorer(Scorer s) {
public void setScorer(Scorer s) throws IOException {
// Gathers all scorers, including s and "under":
allScorers = new ArrayList<>();
allScorers.add(s);

View File

@ -78,6 +78,7 @@ public class AssertingScorer extends Scorer {
// collectors (e.g. ToParentBlockJoinCollector) that
// need to walk the scorer tree will miss/skip the
// Scorer we wrap:
assert iterating();
return Collections.singletonList(new ChildScorer(in, "SHOULD"));
}

View File

@ -502,7 +502,7 @@ public class LTRScoringQuery extends Query {
}
@Override
public Collection<ChildScorer> getChildren() {
public Collection<ChildScorer> getChildren() throws IOException {
return featureTraversalScorer.getChildren();
}