mirror of https://github.com/apache/lucene.git
LUCENE-6466: Remove SpanSimilarity class and make SpanMTQWrapper single-pass
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1682513 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6cd18fa645
commit
f066027bc9
|
@ -251,7 +251,7 @@ API Changes
|
|||
(Paul Elschot via Adrien Grand)
|
||||
|
||||
* LUCENE-6466: Moved SpanQuery.getSpans() and .extractTerms() to SpanWeight
|
||||
(Alan Woodward)
|
||||
(Alan Woodward, Robert Muir)
|
||||
|
||||
* LUCENE-6497: Allow subclasses of FieldType to check frozen state
|
||||
(Ryan Ernst)
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.search.payloads;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -28,7 +30,6 @@ import org.apache.lucene.search.spans.SpanCollectorFactory;
|
|||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanSimilarity;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -40,6 +41,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -78,8 +80,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, PayloadSpanCollector.FACTORY));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new PayloadNearSpanWeight(subWeights, similarity);
|
||||
return new PayloadNearSpanWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -138,18 +139,19 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
|
||||
public class PayloadNearSpanWeight extends SpanNearWeight {
|
||||
|
||||
public PayloadNearSpanWeight(List<SpanWeight> subWeights, SpanSimilarity similarity)
|
||||
public PayloadNearSpanWeight(List<SpanWeight> subWeights, IndexSearcher searcher, Map<Term, TermContext> terms)
|
||||
throws IOException {
|
||||
super(subWeights, similarity, PayloadSpanCollector.FACTORY);
|
||||
super(subWeights, searcher, terms, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
PayloadSpanCollector collector = (PayloadSpanCollector) collectorFactory.newCollector();
|
||||
Spans spans = super.getSpans(context, acceptDocs, collector);
|
||||
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(context));
|
||||
: new PayloadNearSpanScorer(spans, this, collector, simScorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -160,7 +162,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
SimScorer docScorer = similarity.simScorer(simWeight, context);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
Explanation expl = Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.search.spans.BufferedSpanCollector;
|
|||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanSimilarity;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
@ -38,6 +37,8 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -71,8 +72,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
TermContext context = TermContext.build(searcher.getTopReaderContext(), term);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context));
|
||||
return new PayloadTermWeight(context, similarity);
|
||||
return new PayloadTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null);
|
||||
}
|
||||
|
||||
private static class PayloadTermCollector implements SpanCollector {
|
||||
|
@ -107,18 +107,19 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
private class PayloadTermWeight extends SpanTermWeight {
|
||||
|
||||
public PayloadTermWeight(TermContext context, SpanSimilarity similarity)
|
||||
public PayloadTermWeight(TermContext context, IndexSearcher searcher, Map<Term, TermContext> terms)
|
||||
throws IOException {
|
||||
super(context, similarity, PayloadSpanCollector.FACTORY);
|
||||
super(context, searcher, terms, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
PayloadTermCollector collector = new PayloadTermCollector();
|
||||
Spans spans = super.getSpans(context, acceptDocs, collector);
|
||||
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(context));
|
||||
: new PayloadTermSpanScorer(spans, this, collector, simScorer);
|
||||
}
|
||||
|
||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||
|
@ -208,7 +209,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
if (newDoc == doc) {
|
||||
float freq = scorer.sloppyFreq();
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
SimScorer docScorer = similarity.simScorer(simWeight, context);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
Explanation expl = Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -54,9 +55,9 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
|
|||
final SpanWeight bigWeight;
|
||||
final SpanWeight littleWeight;
|
||||
|
||||
public SpanContainWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
public SpanContainWeight(IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(SpanContainQuery.this, similarity, factory);
|
||||
super(SpanContainQuery.this, searcher, terms, factory);
|
||||
this.bigWeight = bigWeight;
|
||||
this.littleWeight = littleWeight;
|
||||
}
|
||||
|
|
|
@ -18,11 +18,14 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that contain another Spans. */
|
||||
public class SpanContainingQuery extends SpanContainQuery {
|
||||
|
@ -51,15 +54,15 @@ public class SpanContainingQuery extends SpanContainQuery {
|
|||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight bigWeight = big.createWeight(searcher, false, factory);
|
||||
SpanWeight littleWeight = little.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight);
|
||||
return new SpanContainingWeight(similarity, factory, bigWeight, littleWeight);
|
||||
return new SpanContainingWeight(searcher, needsScores ? getTermContexts(bigWeight, littleWeight) : null,
|
||||
factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
public class SpanContainingWeight extends SpanContainWeight {
|
||||
|
||||
public SpanContainingWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
public SpanContainingWeight(IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(similarity, factory, bigWeight, littleWeight);
|
||||
super(searcher, terms, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -174,10 +174,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
|
||||
@Override
|
||||
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
|
||||
// TODO: would be nice to not lose term-state here.
|
||||
// we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span
|
||||
// (if you put this thing in another span query, it would extractTerms/double-seek anyway)
|
||||
final SpanTermQuery q = new SpanTermQuery(term);
|
||||
final SpanTermQuery q = new SpanTermQuery(term, states);
|
||||
q.setBoost(boost);
|
||||
topLevel.addClause(q);
|
||||
}
|
||||
|
@ -221,7 +218,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
|
||||
@Override
|
||||
protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, TermContext states) {
|
||||
final SpanTermQuery q = new SpanTermQuery(term);
|
||||
final SpanTermQuery q = new SpanTermQuery(term, states);
|
||||
q.setBoost(boost);
|
||||
topLevel.addClause(q);
|
||||
}
|
||||
|
|
|
@ -117,16 +117,15 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, factory));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new SpanNearWeight(subWeights, similarity, factory);
|
||||
return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, factory);
|
||||
}
|
||||
|
||||
public class SpanNearWeight extends SpanWeight {
|
||||
|
||||
final List<SpanWeight> subWeights;
|
||||
|
||||
public SpanNearWeight(List<SpanWeight> subWeights, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanNearQuery.this, similarity, factory);
|
||||
public SpanNearWeight(List<SpanWeight> subWeights, IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanNearQuery.this, searcher, terms, factory);
|
||||
this.subWeights = subWeights;
|
||||
}
|
||||
|
||||
|
|
|
@ -106,8 +106,8 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight includeWeight = include.createWeight(searcher, false, factory);
|
||||
SpanWeight excludeWeight = exclude.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, includeWeight);
|
||||
return new SpanNotWeight(similarity, factory, includeWeight, excludeWeight);
|
||||
return new SpanNotWeight(searcher, needsScores ? getTermContexts(includeWeight, excludeWeight) : null,
|
||||
factory, includeWeight, excludeWeight);
|
||||
}
|
||||
|
||||
public class SpanNotWeight extends SpanWeight {
|
||||
|
@ -115,9 +115,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
final SpanWeight includeWeight;
|
||||
final SpanWeight excludeWeight;
|
||||
|
||||
public SpanNotWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
public SpanNotWeight(IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory,
|
||||
SpanWeight includeWeight, SpanWeight excludeWeight) throws IOException {
|
||||
super(SpanNotQuery.this, similarity, factory);
|
||||
super(SpanNotQuery.this, searcher, terms, factory);
|
||||
this.includeWeight = includeWeight;
|
||||
this.excludeWeight = excludeWeight;
|
||||
}
|
||||
|
|
|
@ -143,16 +143,15 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, factory));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new SpanOrWeight(similarity, factory, subWeights);
|
||||
return new SpanOrWeight(searcher, needsScores ? getTermContexts(subWeights) : null, factory, subWeights);
|
||||
}
|
||||
|
||||
public class SpanOrWeight extends SpanWeight {
|
||||
|
||||
final List<SpanWeight> subWeights;
|
||||
|
||||
public SpanOrWeight(SpanSimilarity similarity, SpanCollectorFactory factory, List<SpanWeight> subWeights) throws IOException {
|
||||
super(SpanOrQuery.this, similarity, factory);
|
||||
public SpanOrWeight(IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory, List<SpanWeight> subWeights) throws IOException {
|
||||
super(SpanOrQuery.this, searcher, terms, factory);
|
||||
this.subWeights = subWeights;
|
||||
}
|
||||
|
||||
|
|
|
@ -71,22 +71,21 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight matchWeight = match.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, matchWeight);
|
||||
return new SpanPositionCheckWeight(matchWeight, similarity, factory);
|
||||
return new SpanPositionCheckWeight(matchWeight, searcher, needsScores ? getTermContexts(matchWeight) : null, factory);
|
||||
}
|
||||
|
||||
public class SpanPositionCheckWeight extends SpanWeight {
|
||||
|
||||
final SpanWeight matchWeight;
|
||||
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity,
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map<Term, TermContext> terms,
|
||||
SpanCollectorFactory collectorFactory) throws IOException {
|
||||
super(SpanPositionCheckQuery.this, similarity, collectorFactory);
|
||||
super(SpanPositionCheckQuery.this, searcher, terms, collectorFactory);
|
||||
this.matchWeight = matchWeight;
|
||||
}
|
||||
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity) throws IOException {
|
||||
this(matchWeight, similarity, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map<Term, TermContext> terms) throws IOException {
|
||||
this(matchWeight, searcher, terms, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,11 +17,16 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Weight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
|
@ -46,4 +51,28 @@ public abstract class SpanQuery extends Query {
|
|||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return createWeight(searcher, needsScores, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a map of terms to termcontexts, for use in constructing SpanWeights
|
||||
* @lucene.internal
|
||||
*/
|
||||
protected static Map<Term, TermContext> getTermContexts(SpanWeight... weights) {
|
||||
Map<Term, TermContext> terms = new TreeMap<>();
|
||||
for (SpanWeight w : weights) {
|
||||
w.extractTermContexts(terms);
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a map of terms to termcontexts, for use in constructing SpanWeights
|
||||
* @lucene.internal
|
||||
*/
|
||||
protected static Map<Term, TermContext> getTermContexts(Collection<SpanWeight> weights) {
|
||||
Map<Term, TermContext> terms = new TreeMap<>();
|
||||
for (SpanWeight w : weights) {
|
||||
w.extractTermContexts(terms);
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,202 +0,0 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Encapsulates similarity statistics required for SpanScorers
|
||||
*/
|
||||
public abstract class SpanSimilarity {
|
||||
|
||||
/**
|
||||
* The field term statistics are taken from
|
||||
*/
|
||||
protected final String field;
|
||||
|
||||
/**
|
||||
* Create a new SpanSimilarity
|
||||
* @param field the similarity field for term statistics
|
||||
*/
|
||||
protected SpanSimilarity(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a SimScorer for this SpanSimilarity's statistics
|
||||
* @param context the LeafReaderContext to calculate the scorer for
|
||||
* @return a SimScorer, or null if no scoring is required
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* @return the field for term statistics
|
||||
*/
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.search.Weight#getValueForNormalization()}
|
||||
*
|
||||
* @return the value for normalization
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract float getValueForNormalization() throws IOException;
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.search.Weight#normalize(float,float)}
|
||||
*
|
||||
* @param queryNorm the query norm
|
||||
* @param topLevelBoost the top level boost
|
||||
*/
|
||||
public abstract void normalize(float queryNorm, float topLevelBoost);
|
||||
|
||||
/**
|
||||
* A SpanSimilarity class that calculates similarity statistics based on the term statistics
|
||||
* of a set of terms.
|
||||
*/
|
||||
public static class ScoringSimilarity extends SpanSimilarity {
|
||||
|
||||
private final Similarity similarity;
|
||||
private final Similarity.SimWeight stats;
|
||||
|
||||
private ScoringSimilarity(SpanQuery query, IndexSearcher searcher, TermStatistics... termStats) throws IOException {
|
||||
super(query.getField());
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException {
|
||||
return similarity.simScorer(stats, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return stats.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A SpanSimilarity class that does no scoring
|
||||
*/
|
||||
public static class NonScoringSimilarity extends SpanSimilarity {
|
||||
|
||||
private NonScoringSimilarity(String field) {
|
||||
super(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to be run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param stats an array of TermStatistics to use in creating the similarity
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher,
|
||||
boolean needsScores, TermStatistics... stats) throws IOException {
|
||||
return needsScores ? new ScoringSimilarity(query, searcher, stats) : new NonScoringSimilarity(query.getField());
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to be run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param weights a set of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, List<SpanWeight> weights) throws IOException {
|
||||
return build(query, searcher, needsScores, weights.toArray(new SpanWeight[weights.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param weights an array of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, SpanWeight... weights) throws IOException {
|
||||
|
||||
if (!needsScores)
|
||||
return new NonScoringSimilarity(query.getField());
|
||||
|
||||
Map<Term, TermContext> contexts = new HashMap<>();
|
||||
for (SpanWeight w : weights) {
|
||||
w.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
if (contexts.size() == 0)
|
||||
return null;
|
||||
|
||||
TermStatistics[] stats = new TermStatistics[contexts.size()];
|
||||
int i = 0;
|
||||
for (Term term : contexts.keySet()) {
|
||||
stats[i] = searcher.termStatistics(term, contexts.get(term));
|
||||
i++;
|
||||
}
|
||||
|
||||
return new ScoringSimilarity(query, searcher, stats);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,8 +17,10 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -29,6 +31,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
@ -37,11 +40,23 @@ import java.util.Set;
|
|||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
public class SpanTermQuery extends SpanQuery {
|
||||
protected Term term;
|
||||
|
||||
protected final Term term;
|
||||
protected final TermContext termContext;
|
||||
|
||||
/** Construct a SpanTermQuery matching the named term's spans. */
|
||||
public SpanTermQuery(Term term) {
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.termContext = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Construct a SpanTermQuery matching the named term's spans, using
|
||||
* the provided TermContext
|
||||
*/
|
||||
public SpanTermQuery(Term term, TermContext context) {
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.termContext = context;
|
||||
}
|
||||
|
||||
/** Return the term whose spans are matched. */
|
||||
|
@ -52,18 +67,25 @@ public class SpanTermQuery extends SpanQuery {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
TermContext context = TermContext.build(searcher.getTopReaderContext(), term);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context));
|
||||
return new SpanTermWeight(context, similarity, factory);
|
||||
final TermContext context;
|
||||
final IndexReaderContext topContext = searcher.getTopReaderContext();
|
||||
if (termContext == null || termContext.topReaderContext != topContext) {
|
||||
context = TermContext.build(topContext, term);
|
||||
}
|
||||
else {
|
||||
context = termContext;
|
||||
}
|
||||
return new SpanTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null, factory);
|
||||
}
|
||||
|
||||
public class SpanTermWeight extends SpanWeight {
|
||||
|
||||
final TermContext termContext;
|
||||
|
||||
public SpanTermWeight(TermContext termContext, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanTermQuery.this, similarity, factory);
|
||||
public SpanTermWeight(TermContext termContext, IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanTermQuery.this, searcher, terms, factory);
|
||||
this.termContext = termContext;
|
||||
assert termContext != null : "TermContext must not be null";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -79,8 +101,11 @@ public class SpanTermQuery extends SpanQuery {
|
|||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
|
||||
assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||
|
||||
final TermState state = termContext.get(context.ord);
|
||||
if (state == null) { // term is not present in that reader
|
||||
assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,9 +21,13 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -35,20 +39,39 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class SpanWeight extends Weight {
|
||||
|
||||
protected final SpanSimilarity similarity;
|
||||
protected final Similarity similarity;
|
||||
protected final Similarity.SimWeight simWeight;
|
||||
protected final SpanCollectorFactory collectorFactory;
|
||||
protected final String field;
|
||||
|
||||
/**
|
||||
* Create a new SpanWeight
|
||||
* @param query the parent query
|
||||
* @param similarity a SpanSimilarity to be used for scoring
|
||||
* @param searcher the IndexSearcher to query against
|
||||
* @param termContexts a map of terms to termcontexts for use in building the similarity. May
|
||||
* be null if scores are not required
|
||||
* @param collectorFactory a SpanCollectorFactory to be used for Span collection
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public SpanWeight(SpanQuery query, SpanSimilarity similarity, SpanCollectorFactory collectorFactory) throws IOException {
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts, SpanCollectorFactory collectorFactory) throws IOException {
|
||||
super(query);
|
||||
this.similarity = similarity;
|
||||
this.field = query.getField();
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.collectorFactory = collectorFactory;
|
||||
this.simWeight = buildSimWeight(query, searcher, termContexts);
|
||||
}
|
||||
|
||||
private Similarity.SimWeight buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts) throws IOException {
|
||||
if (termContexts == null || termContexts.size() == 0 || query.getField() == null)
|
||||
return null;
|
||||
TermStatistics[] termStats = new TermStatistics[termContexts.size()];
|
||||
int i = 0;
|
||||
for (Term term : termContexts.keySet()) {
|
||||
termStats[i] = searcher.termStatistics(term, termContexts.get(term));
|
||||
i++;
|
||||
}
|
||||
CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
|
||||
return searcher.getSimilarity().computeWeight(query.getBoost(), collectionStats, termStats);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -81,27 +104,28 @@ public abstract class SpanWeight extends Weight {
|
|||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return similarity == null ? 1.0f : similarity.getValueForNormalization();
|
||||
return simWeight == null ? 1.0f : simWeight.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
if (similarity != null) {
|
||||
similarity.normalize(queryNorm, topLevelBoost);
|
||||
if (simWeight != null) {
|
||||
simWeight.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (similarity == null) {
|
||||
if (field == null) {
|
||||
return null;
|
||||
}
|
||||
Terms terms = context.reader().terms(similarity.getField());
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms != null && terms.hasPositions() == false) {
|
||||
throw new IllegalStateException("field \"" + similarity.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
|
||||
throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
|
||||
}
|
||||
Spans spans = getSpans(context, acceptDocs, collectorFactory.newCollector());
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(context));
|
||||
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, simScorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -111,7 +135,7 @@ public abstract class SpanWeight extends Weight {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.sloppyFreq();
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
SimScorer docScorer = similarity.simScorer(simWeight, context);
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
return Explanation.match(scoreExplanation.getValue(),
|
||||
|
|
|
@ -18,11 +18,14 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that are contained within another Spans. */
|
||||
public class SpanWithinQuery extends SpanContainQuery {
|
||||
|
@ -52,15 +55,15 @@ public class SpanWithinQuery extends SpanContainQuery {
|
|||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight bigWeight = big.createWeight(searcher, false, factory);
|
||||
SpanWeight littleWeight = little.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight);
|
||||
return new SpanWithinWeight(similarity, factory, bigWeight, littleWeight);
|
||||
return new SpanWithinWeight(searcher, needsScores ? getTermContexts(bigWeight, littleWeight) : null,
|
||||
factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
public class SpanWithinWeight extends SpanContainWeight {
|
||||
|
||||
public SpanWithinWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
public SpanWithinWeight(IndexSearcher searcher, Map<Term, TermContext> terms, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(similarity, factory, bigWeight, littleWeight);
|
||||
super(searcher, terms, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -44,7 +44,7 @@ public class AssertingSpanQuery extends SpanQuery {
|
|||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight weight = in.createWeight(searcher, needsScores, factory);
|
||||
return new AssertingSpanWeight(weight);
|
||||
return new AssertingSpanWeight(searcher, weight);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.search.spans;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -38,8 +41,8 @@ public class AssertingSpanWeight extends SpanWeight {
|
|||
* @param in the SpanWeight to wrap
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public AssertingSpanWeight(SpanWeight in) throws IOException {
|
||||
super((SpanQuery) in.getQuery(), in.similarity, in.collectorFactory);
|
||||
public AssertingSpanWeight(IndexSearcher searcher, SpanWeight in) throws IOException {
|
||||
super((SpanQuery) in.getQuery(), searcher, null, in.collectorFactory);
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
|
@ -60,4 +63,24 @@ public class AssertingSpanWeight extends SpanWeight {
|
|||
public void extractTerms(Set<Term> terms) {
|
||||
in.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return in.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
in.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return in.scorer(context, acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return in.explain(context, doc);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue