From f066027bc9634fe1ec64960a0aa9f0d728430c82 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 29 May 2015 16:23:08 +0000 Subject: [PATCH] LUCENE-6466: Remove SpanSimilarity class and make SpanMTQWrapper single-pass git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1682513 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 2 +- .../search/payloads/PayloadNearQuery.java | 16 +- .../search/payloads/PayloadTermQuery.java | 15 +- .../lucene/search/spans/SpanContainQuery.java | 5 +- .../search/spans/SpanContainingQuery.java | 11 +- .../spans/SpanMultiTermQueryWrapper.java | 7 +- .../lucene/search/spans/SpanNearQuery.java | 7 +- .../lucene/search/spans/SpanNotQuery.java | 8 +- .../lucene/search/spans/SpanOrQuery.java | 7 +- .../search/spans/SpanPositionCheckQuery.java | 11 +- .../apache/lucene/search/spans/SpanQuery.java | 29 +++ .../lucene/search/spans/SpanSimilarity.java | 202 ------------------ .../lucene/search/spans/SpanTermQuery.java | 37 +++- .../lucene/search/spans/SpanWeight.java | 48 +++-- .../lucene/search/spans/SpanWithinQuery.java | 11 +- .../search/spans/AssertingSpanQuery.java | 2 +- .../search/spans/AssertingSpanWeight.java | 27 ++- 17 files changed, 174 insertions(+), 271 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 992f00fcc25..fff46e634ce 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -251,7 +251,7 @@ API Changes (Paul Elschot via Adrien Grand) * LUCENE-6466: Moved SpanQuery.getSpans() and .extractTerms() to SpanWeight - (Alan Woodward) + (Alan Woodward, Robert Muir) * LUCENE-6497: Allow subclasses of FieldType to check frozen state (Ryan Ernst) diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index 7ce87ccbde4..9fddd202373 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -18,6 +18,8 @@ package org.apache.lucene.search.payloads; */ import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; @@ -28,7 +30,6 @@ import org.apache.lucene.search.spans.SpanCollectorFactory; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; -import org.apache.lucene.search.spans.SpanSimilarity; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Bits; @@ -40,6 +41,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; /** @@ -78,8 +80,7 @@ public class PayloadNearQuery extends SpanNearQuery { for (SpanQuery q : clauses) { subWeights.add(q.createWeight(searcher, false, PayloadSpanCollector.FACTORY)); } - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); - return new PayloadNearSpanWeight(subWeights, similarity); + return new PayloadNearSpanWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null); } @Override @@ -138,18 +139,19 @@ public class PayloadNearQuery extends SpanNearQuery { public class PayloadNearSpanWeight extends SpanNearWeight { - public PayloadNearSpanWeight(List subWeights, SpanSimilarity similarity) + public PayloadNearSpanWeight(List subWeights, IndexSearcher searcher, Map terms) throws IOException { - super(subWeights, similarity, PayloadSpanCollector.FACTORY); + super(subWeights, searcher, terms, PayloadSpanCollector.FACTORY); } @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { PayloadSpanCollector collector = (PayloadSpanCollector) collectorFactory.newCollector(); Spans spans = super.getSpans(context, acceptDocs, collector); + Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context); return (spans == null) ? null - : new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(context)); + : new PayloadNearSpanScorer(spans, this, collector, simScorer); } @Override @@ -160,7 +162,7 @@ public class PayloadNearQuery extends SpanNearQuery { if (newDoc == doc) { float freq = scorer.freq(); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); - SimScorer docScorer = similarity.simScorer(context); + SimScorer docScorer = similarity.simScorer(simWeight, context); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); Explanation expl = Explanation.match( scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 6a0420d1115..55f2c81c388 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -30,7 +30,6 @@ import org.apache.lucene.search.spans.BufferedSpanCollector; import org.apache.lucene.search.spans.SpanCollector; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; -import org.apache.lucene.search.spans.SpanSimilarity; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; @@ -38,6 +37,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import java.io.IOException; +import java.util.Collections; +import java.util.Map; import java.util.Objects; /** @@ -71,8 +72,7 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { TermContext context = TermContext.build(searcher.getTopReaderContext(), term); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context)); - return new PayloadTermWeight(context, similarity); + return new PayloadTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null); } private static class PayloadTermCollector implements SpanCollector { @@ -107,18 +107,19 @@ public class PayloadTermQuery extends SpanTermQuery { private class PayloadTermWeight extends SpanTermWeight { - public PayloadTermWeight(TermContext context, SpanSimilarity similarity) + public PayloadTermWeight(TermContext context, IndexSearcher searcher, Map terms) throws IOException { - super(context, similarity, PayloadSpanCollector.FACTORY); + super(context, searcher, terms, PayloadSpanCollector.FACTORY); } @Override public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { PayloadTermCollector collector = new PayloadTermCollector(); Spans spans = super.getSpans(context, acceptDocs, collector); + Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context); return (spans == null) ? null - : new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(context)); + : new PayloadTermSpanScorer(spans, this, collector, simScorer); } protected class PayloadTermSpanScorer extends SpanScorer { @@ -208,7 +209,7 @@ public class PayloadTermQuery extends SpanTermQuery { if (newDoc == doc) { float freq = scorer.sloppyFreq(); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); - SimScorer docScorer = similarity.simScorer(context); + SimScorer docScorer = similarity.simScorer(simWeight, context); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); Explanation expl = Explanation.match( scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java index 198d7fef89d..40066a3ef82 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; @@ -54,9 +55,9 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable { final SpanWeight bigWeight; final SpanWeight littleWeight; - public SpanContainWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + public SpanContainWeight(IndexSearcher searcher, Map terms, SpanCollectorFactory factory, SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { - super(SpanContainQuery.this, similarity, factory); + super(SpanContainQuery.this, searcher, terms, factory); this.bigWeight = bigWeight; this.littleWeight = littleWeight; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 88c304a9bc2..ab97ddadf8a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -18,11 +18,14 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.ArrayList; +import java.util.Map; /** Keep matches that contain another Spans. */ public class SpanContainingQuery extends SpanContainQuery { @@ -51,15 +54,15 @@ public class SpanContainingQuery extends SpanContainQuery { public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { SpanWeight bigWeight = big.createWeight(searcher, false, factory); SpanWeight littleWeight = little.createWeight(searcher, false, factory); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight); - return new SpanContainingWeight(similarity, factory, bigWeight, littleWeight); + return new SpanContainingWeight(searcher, needsScores ? getTermContexts(bigWeight, littleWeight) : null, + factory, bigWeight, littleWeight); } public class SpanContainingWeight extends SpanContainWeight { - public SpanContainingWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + public SpanContainingWeight(IndexSearcher searcher, Map terms, SpanCollectorFactory factory, SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { - super(similarity, factory, bigWeight, littleWeight); + super(searcher, terms, factory, bigWeight, littleWeight); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index b7011eac3b2..8799e96efc4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -174,10 +174,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue @Override protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) { - // TODO: would be nice to not lose term-state here. - // we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span - // (if you put this thing in another span query, it would extractTerms/double-seek anyway) - final SpanTermQuery q = new SpanTermQuery(term); + final SpanTermQuery q = new SpanTermQuery(term, states); q.setBoost(boost); topLevel.addClause(q); } @@ -221,7 +218,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue @Override protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, TermContext states) { - final SpanTermQuery q = new SpanTermQuery(term); + final SpanTermQuery q = new SpanTermQuery(term, states); q.setBoost(boost); topLevel.addClause(q); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index a0c431b7ebe..742abffd546 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -117,16 +117,15 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { for (SpanQuery q : clauses) { subWeights.add(q.createWeight(searcher, false, factory)); } - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); - return new SpanNearWeight(subWeights, similarity, factory); + return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, factory); } public class SpanNearWeight extends SpanWeight { final List subWeights; - public SpanNearWeight(List subWeights, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException { - super(SpanNearQuery.this, similarity, factory); + public SpanNearWeight(List subWeights, IndexSearcher searcher, Map terms, SpanCollectorFactory factory) throws IOException { + super(SpanNearQuery.this, searcher, terms, factory); this.subWeights = subWeights; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 73eea4384f5..578dae414ce 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -106,8 +106,8 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { SpanWeight includeWeight = include.createWeight(searcher, false, factory); SpanWeight excludeWeight = exclude.createWeight(searcher, false, factory); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, includeWeight); - return new SpanNotWeight(similarity, factory, includeWeight, excludeWeight); + return new SpanNotWeight(searcher, needsScores ? getTermContexts(includeWeight, excludeWeight) : null, + factory, includeWeight, excludeWeight); } public class SpanNotWeight extends SpanWeight { @@ -115,9 +115,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { final SpanWeight includeWeight; final SpanWeight excludeWeight; - public SpanNotWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + public SpanNotWeight(IndexSearcher searcher, Map terms, SpanCollectorFactory factory, SpanWeight includeWeight, SpanWeight excludeWeight) throws IOException { - super(SpanNotQuery.this, similarity, factory); + super(SpanNotQuery.this, searcher, terms, factory); this.includeWeight = includeWeight; this.excludeWeight = excludeWeight; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 85a890996d7..2c4e25522e8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -143,16 +143,15 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { for (SpanQuery q : clauses) { subWeights.add(q.createWeight(searcher, false, factory)); } - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); - return new SpanOrWeight(similarity, factory, subWeights); + return new SpanOrWeight(searcher, needsScores ? getTermContexts(subWeights) : null, factory, subWeights); } public class SpanOrWeight extends SpanWeight { final List subWeights; - public SpanOrWeight(SpanSimilarity similarity, SpanCollectorFactory factory, List subWeights) throws IOException { - super(SpanOrQuery.this, similarity, factory); + public SpanOrWeight(IndexSearcher searcher, Map terms, SpanCollectorFactory factory, List subWeights) throws IOException { + super(SpanOrQuery.this, searcher, terms, factory); this.subWeights = subWeights; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index a848bb38254..f125581d919 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -71,22 +71,21 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { SpanWeight matchWeight = match.createWeight(searcher, false, factory); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, matchWeight); - return new SpanPositionCheckWeight(matchWeight, similarity, factory); + return new SpanPositionCheckWeight(matchWeight, searcher, needsScores ? getTermContexts(matchWeight) : null, factory); } public class SpanPositionCheckWeight extends SpanWeight { final SpanWeight matchWeight; - public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity, + public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map terms, SpanCollectorFactory collectorFactory) throws IOException { - super(SpanPositionCheckQuery.this, similarity, collectorFactory); + super(SpanPositionCheckQuery.this, searcher, terms, collectorFactory); this.matchWeight = matchWeight; } - public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity) throws IOException { - this(matchWeight, similarity, SpanCollectorFactory.NO_OP_FACTORY); + public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map terms) throws IOException { + this(matchWeight, searcher, terms, SpanCollectorFactory.NO_OP_FACTORY); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index fea6d98696a..6460b3855b0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -17,11 +17,16 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.TreeMap; /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { @@ -46,4 +51,28 @@ public abstract class SpanQuery extends Query { public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return createWeight(searcher, needsScores, SpanCollectorFactory.NO_OP_FACTORY); } + + /** + * Build a map of terms to termcontexts, for use in constructing SpanWeights + * @lucene.internal + */ + protected static Map getTermContexts(SpanWeight... weights) { + Map terms = new TreeMap<>(); + for (SpanWeight w : weights) { + w.extractTermContexts(terms); + } + return terms; + } + + /** + * Build a map of terms to termcontexts, for use in constructing SpanWeights + * @lucene.internal + */ + protected static Map getTermContexts(Collection weights) { + Map terms = new TreeMap<>(); + for (SpanWeight w : weights) { + w.extractTermContexts(terms); + } + return terms; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java deleted file mode 100644 index 517f7aba34f..00000000000 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java +++ /dev/null @@ -1,202 +0,0 @@ -package org.apache.lucene.search.spans; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.similarities.Similarity; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Encapsulates similarity statistics required for SpanScorers - */ -public abstract class SpanSimilarity { - - /** - * The field term statistics are taken from - */ - protected final String field; - - /** - * Create a new SpanSimilarity - * @param field the similarity field for term statistics - */ - protected SpanSimilarity(String field) { - this.field = field; - } - - /** - * Create a SimScorer for this SpanSimilarity's statistics - * @param context the LeafReaderContext to calculate the scorer for - * @return a SimScorer, or null if no scoring is required - * @throws IOException on error - */ - public abstract Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException; - - /** - * @return the field for term statistics - */ - public String getField() { - return field; - } - - /** - * See {@link org.apache.lucene.search.Weight#getValueForNormalization()} - * - * @return the value for normalization - * @throws IOException on error - */ - public abstract float getValueForNormalization() throws IOException; - - /** - * See {@link org.apache.lucene.search.Weight#normalize(float,float)} - * - * @param queryNorm the query norm - * @param topLevelBoost the top level boost - */ - public abstract void normalize(float queryNorm, float topLevelBoost); - - /** - * A SpanSimilarity class that calculates similarity statistics based on the term statistics - * of a set of terms. - */ - public static class ScoringSimilarity extends SpanSimilarity { - - private final Similarity similarity; - private final Similarity.SimWeight stats; - - private ScoringSimilarity(SpanQuery query, IndexSearcher searcher, TermStatistics... termStats) throws IOException { - super(query.getField()); - this.similarity = searcher.getSimilarity(); - this.stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats); - } - - @Override - public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException { - return similarity.simScorer(stats, context); - } - - @Override - public String getField() { - return field; - } - - @Override - public float getValueForNormalization() throws IOException { - return stats.getValueForNormalization(); - } - - @Override - public void normalize(float queryNorm, float topLevelBoost) { - stats.normalize(queryNorm, topLevelBoost); - } - - } - - /** - * A SpanSimilarity class that does no scoring - */ - public static class NonScoringSimilarity extends SpanSimilarity { - - private NonScoringSimilarity(String field) { - super(field); - } - - @Override - public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException { - return null; - } - - @Override - public float getValueForNormalization() throws IOException { - return 0; - } - - @Override - public void normalize(float queryNorm, float topLevelBoost) { - - } - } - - /** - * Build a SpanSimilarity - * @param query the SpanQuery to be run - * @param searcher the searcher - * @param needsScores whether or not scores are required - * @param stats an array of TermStatistics to use in creating the similarity - * @return a SpanSimilarity, or null if there are no statistics to use - * @throws IOException on error - */ - public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, - boolean needsScores, TermStatistics... stats) throws IOException { - return needsScores ? new ScoringSimilarity(query, searcher, stats) : new NonScoringSimilarity(query.getField()); - } - - /** - * Build a SpanSimilarity - * @param query the SpanQuery to be run - * @param searcher the searcher - * @param needsScores whether or not scores are required - * @param weights a set of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from - * @return a SpanSimilarity, or null if there are no statistics to use - * @throws IOException on error - */ - public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, List weights) throws IOException { - return build(query, searcher, needsScores, weights.toArray(new SpanWeight[weights.size()])); - } - - /** - * Build a SpanSimilarity - * @param query the SpanQuery to run - * @param searcher the searcher - * @param needsScores whether or not scores are required - * @param weights an array of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from - * @return a SpanSimilarity, or null if there are no statistics to use - * @throws IOException on error - */ - public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, SpanWeight... weights) throws IOException { - - if (!needsScores) - return new NonScoringSimilarity(query.getField()); - - Map contexts = new HashMap<>(); - for (SpanWeight w : weights) { - w.extractTermContexts(contexts); - } - - if (contexts.size() == 0) - return null; - - TermStatistics[] stats = new TermStatistics[contexts.size()]; - int i = 0; - for (Term term : contexts.keySet()) { - stats[i] = searcher.termStatistics(term, contexts.get(term)); - i++; - } - - return new ScoringSimilarity(query, searcher, stats); - } - -} diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index f6647a1e3a4..6a0c9ceedff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -17,8 +17,10 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; @@ -29,6 +31,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -37,11 +40,23 @@ import java.util.Set; * This should not be used for terms that are indexed at position Integer.MAX_VALUE. */ public class SpanTermQuery extends SpanQuery { - protected Term term; + + protected final Term term; + protected final TermContext termContext; /** Construct a SpanTermQuery matching the named term's spans. */ public SpanTermQuery(Term term) { this.term = Objects.requireNonNull(term); + this.termContext = null; + } + + /** + * Expert: Construct a SpanTermQuery matching the named term's spans, using + * the provided TermContext + */ + public SpanTermQuery(Term term, TermContext context) { + this.term = Objects.requireNonNull(term); + this.termContext = context; } /** Return the term whose spans are matched. */ @@ -52,18 +67,25 @@ public class SpanTermQuery extends SpanQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { - TermContext context = TermContext.build(searcher.getTopReaderContext(), term); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context)); - return new SpanTermWeight(context, similarity, factory); + final TermContext context; + final IndexReaderContext topContext = searcher.getTopReaderContext(); + if (termContext == null || termContext.topReaderContext != topContext) { + context = TermContext.build(topContext, term); + } + else { + context = termContext; + } + return new SpanTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null, factory); } public class SpanTermWeight extends SpanWeight { final TermContext termContext; - public SpanTermWeight(TermContext termContext, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException { - super(SpanTermQuery.this, similarity, factory); + public SpanTermWeight(TermContext termContext, IndexSearcher searcher, Map terms, SpanCollectorFactory factory) throws IOException { + super(SpanTermQuery.this, searcher, terms, factory); this.termContext = termContext; + assert termContext != null : "TermContext must not be null"; } @Override @@ -79,8 +101,11 @@ public class SpanTermQuery extends SpanQuery { @Override public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException { + assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + final TermState state = termContext.get(context.ord); if (state == null) { // term is not present in that reader + assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 45d5a9ac02c..8e360edbe59 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -21,9 +21,13 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; @@ -35,20 +39,39 @@ import java.util.Map; */ public abstract class SpanWeight extends Weight { - protected final SpanSimilarity similarity; + protected final Similarity similarity; + protected final Similarity.SimWeight simWeight; protected final SpanCollectorFactory collectorFactory; + protected final String field; /** * Create a new SpanWeight * @param query the parent query - * @param similarity a SpanSimilarity to be used for scoring + * @param searcher the IndexSearcher to query against + * @param termContexts a map of terms to termcontexts for use in building the similarity. May + * be null if scores are not required * @param collectorFactory a SpanCollectorFactory to be used for Span collection * @throws IOException on error */ - public SpanWeight(SpanQuery query, SpanSimilarity similarity, SpanCollectorFactory collectorFactory) throws IOException { + public SpanWeight(SpanQuery query, IndexSearcher searcher, Map termContexts, SpanCollectorFactory collectorFactory) throws IOException { super(query); - this.similarity = similarity; + this.field = query.getField(); + this.similarity = searcher.getSimilarity(); this.collectorFactory = collectorFactory; + this.simWeight = buildSimWeight(query, searcher, termContexts); + } + + private Similarity.SimWeight buildSimWeight(SpanQuery query, IndexSearcher searcher, Map termContexts) throws IOException { + if (termContexts == null || termContexts.size() == 0 || query.getField() == null) + return null; + TermStatistics[] termStats = new TermStatistics[termContexts.size()]; + int i = 0; + for (Term term : termContexts.keySet()) { + termStats[i] = searcher.termStatistics(term, termContexts.get(term)); + i++; + } + CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField()); + return searcher.getSimilarity().computeWeight(query.getBoost(), collectionStats, termStats); } /** @@ -81,27 +104,28 @@ public abstract class SpanWeight extends Weight { @Override public float getValueForNormalization() throws IOException { - return similarity == null ? 1.0f : similarity.getValueForNormalization(); + return simWeight == null ? 1.0f : simWeight.getValueForNormalization(); } @Override public void normalize(float queryNorm, float topLevelBoost) { - if (similarity != null) { - similarity.normalize(queryNorm, topLevelBoost); + if (simWeight != null) { + simWeight.normalize(queryNorm, topLevelBoost); } } @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - if (similarity == null) { + if (field == null) { return null; } - Terms terms = context.reader().terms(similarity.getField()); + Terms terms = context.reader().terms(field); if (terms != null && terms.hasPositions() == false) { - throw new IllegalStateException("field \"" + similarity.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); + throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); } Spans spans = getSpans(context, acceptDocs, collectorFactory.newCollector()); - return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(context)); + Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context); + return (spans == null) ? null : new SpanScorer(spans, this, simScorer); } @Override @@ -111,7 +135,7 @@ public abstract class SpanWeight extends Weight { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.sloppyFreq(); - SimScorer docScorer = similarity.simScorer(context); + SimScorer docScorer = similarity.simScorer(simWeight, context); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match(scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 7b13d819297..3b7ef3863ae 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -18,11 +18,14 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.ArrayList; +import java.util.Map; /** Keep matches that are contained within another Spans. */ public class SpanWithinQuery extends SpanContainQuery { @@ -52,15 +55,15 @@ public class SpanWithinQuery extends SpanContainQuery { public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { SpanWeight bigWeight = big.createWeight(searcher, false, factory); SpanWeight littleWeight = little.createWeight(searcher, false, factory); - SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight); - return new SpanWithinWeight(similarity, factory, bigWeight, littleWeight); + return new SpanWithinWeight(searcher, needsScores ? getTermContexts(bigWeight, littleWeight) : null, + factory, bigWeight, littleWeight); } public class SpanWithinWeight extends SpanContainWeight { - public SpanWithinWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + public SpanWithinWeight(IndexSearcher searcher, Map terms, SpanCollectorFactory factory, SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { - super(similarity, factory, bigWeight, littleWeight); + super(searcher, terms, factory, bigWeight, littleWeight); } /** diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index 2a30a174299..cbb82647965 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -44,7 +44,7 @@ public class AssertingSpanQuery extends SpanQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { SpanWeight weight = in.createWeight(searcher, needsScores, factory); - return new AssertingSpanWeight(weight); + return new AssertingSpanWeight(searcher, weight); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index d685832e583..ec9c15262b3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -20,6 +20,9 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Scorer; import org.apache.lucene.util.Bits; import java.io.IOException; @@ -38,8 +41,8 @@ public class AssertingSpanWeight extends SpanWeight { * @param in the SpanWeight to wrap * @throws IOException on error */ - public AssertingSpanWeight(SpanWeight in) throws IOException { - super((SpanQuery) in.getQuery(), in.similarity, in.collectorFactory); + public AssertingSpanWeight(IndexSearcher searcher, SpanWeight in) throws IOException { + super((SpanQuery) in.getQuery(), searcher, null, in.collectorFactory); this.in = in; } @@ -60,4 +63,24 @@ public class AssertingSpanWeight extends SpanWeight { public void extractTerms(Set terms) { in.extractTerms(terms); } + + @Override + public float getValueForNormalization() throws IOException { + return in.getValueForNormalization(); + } + + @Override + public void normalize(float queryNorm, float topLevelBoost) { + in.normalize(queryNorm, topLevelBoost); + } + + @Override + public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { + return in.scorer(context, acceptDocs); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + return in.explain(context, doc); + } }