From 2183e67cfd90943025645cb83fc6c555b9679fdc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 20 May 2015 13:29:10 +0000 Subject: [PATCH] LUCENE-6466: Move SpanQuery.getSpans() and .extractTerms() to SpanWeight git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680565 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../search/payloads/PayloadNearQuery.java | 27 +- .../search/payloads/PayloadSpanCollector.java | 2 +- .../search/payloads/PayloadSpanUtil.java | 16 +- .../search/payloads/PayloadTermQuery.java | 22 +- .../payloads/SpanNearPayloadCheckQuery.java | 2 +- .../payloads/SpanPayloadCheckQuery.java | 2 +- .../search/spans/FieldMaskingSpanQuery.java | 20 +- .../search/spans/SpanCollectorFactory.java | 7 +- .../lucene/search/spans/SpanContainQuery.java | 57 ++- .../search/spans/SpanContainingQuery.java | 116 +++--- .../spans/SpanMultiTermQueryWrapper.java | 22 +- .../lucene/search/spans/SpanNearQuery.java | 72 ++-- .../lucene/search/spans/SpanNotQuery.java | 153 ++++---- .../lucene/search/spans/SpanOrQuery.java | 347 ++++++++++-------- .../search/spans/SpanPositionCheckQuery.java | 60 ++- .../apache/lucene/search/spans/SpanQuery.java | 40 +- .../lucene/search/spans/SpanScorer.java | 12 +- .../lucene/search/spans/SpanSimilarity.java | 202 ++++++++++ .../lucene/search/spans/SpanTermQuery.java | 84 +++-- .../lucene/search/spans/SpanWeight.java | 96 +++-- .../lucene/search/spans/SpanWithinQuery.java | 116 +++--- .../search/spans/JustCompileSearchSpans.java | 14 +- .../search/spans/MultiSpansWrapper.java | 23 +- .../spans/TestFieldMaskingSpanQuery.java | 13 +- .../highlight/WeightedSpanTermExtractor.java | 10 +- .../search/spans/AssertingSpanQuery.java | 33 +- .../search/spans/AssertingSpanWeight.java | 63 ++++ 28 files changed, 992 insertions(+), 642 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6b28c695370..3808b24b89b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -204,6 +204,9 @@ API Changes * LUCENE-6484: Removed EliasFanoDocIdSet, which was unused. (Paul Elschot via Adrien Grand) +* LUCENE-6466: Moved SpanQuery.getSpans() and .extractTerms() to SpanWeight + (Alan Woodward) + Other * LUCENE-6413: Test runner should report the number of suites completed/ diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index 61c37699b62..7ce87ccbde4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -24,9 +24,11 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.spans.SpanCollectorFactory; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; +import org.apache.lucene.search.spans.SpanSimilarity; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Bits; @@ -34,8 +36,10 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; +import java.util.List; import java.util.Objects; /** @@ -69,8 +73,13 @@ public class PayloadNearQuery extends SpanNearQuery { } @Override - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new PayloadNearSpanWeight(this, searcher); + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + List subWeights = new ArrayList<>(); + for (SpanQuery q : clauses) { + subWeights.add(q.createWeight(searcher, false, PayloadSpanCollector.FACTORY)); + } + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); + return new PayloadNearSpanWeight(subWeights, similarity); } @Override @@ -127,20 +136,20 @@ public class PayloadNearQuery extends SpanNearQuery { && function.equals(other.function); } - public class PayloadNearSpanWeight extends SpanWeight { + public class PayloadNearSpanWeight extends SpanNearWeight { - public PayloadNearSpanWeight(SpanQuery query, IndexSearcher searcher) + public PayloadNearSpanWeight(List subWeights, SpanSimilarity similarity) throws IOException { - super(query, searcher, PayloadSpanCollector.FACTORY); + super(subWeights, similarity, PayloadSpanCollector.FACTORY); } @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - PayloadSpanCollector collector = PayloadSpanCollector.FACTORY.newCollector(); - Spans spans = query.getSpans(context, acceptDocs, termContexts, collector); + PayloadSpanCollector collector = (PayloadSpanCollector) collectorFactory.newCollector(); + Spans spans = super.getSpans(context, acceptDocs, collector); return (spans == null) ? null - : new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(stats, context)); + : new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(context)); } @Override @@ -151,7 +160,7 @@ public class PayloadNearQuery extends SpanNearQuery { if (newDoc == doc) { float freq = scorer.freq(); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); - SimScorer docScorer = similarity.simScorer(stats, context); + SimScorer docScorer = similarity.simScorer(context); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); Explanation expl = Explanation.match( scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java index d42ab05ff7d..47a32fb35b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java @@ -34,7 +34,7 @@ import java.util.Collection; */ public class PayloadSpanCollector implements SpanCollector { - public static final SpanCollectorFactory FACTORY = new SpanCollectorFactory() { + public static final SpanCollectorFactory FACTORY = new SpanCollectorFactory() { @Override public PayloadSpanCollector newCollector() { return new PayloadSpanCollector(); diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java index 6d3dab55d01..bb69b7ee928 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java @@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; @@ -35,16 +34,14 @@ import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.TreeSet; /** * Experimental class to get set of payloads for most standard Lucene queries. @@ -179,18 +176,15 @@ public class PayloadSpanUtil { private void getPayloads(Collection payloads, SpanQuery query) throws IOException { - Map termContexts = new HashMap<>(); - TreeSet terms = new TreeSet<>(); + final IndexSearcher searcher = new IndexSearcher(context); searcher.setQueryCache(null); - searcher.createNormalizedWeight(query, false).extractTerms(terms); - for (Term term : terms) { - termContexts.put(term, TermContext.build(context, term)); - } + + SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false); PayloadSpanCollector collector = new PayloadSpanCollector(); for (LeafReaderContext leafReaderContext : context.leaves()) { - final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts, collector); + final Spans spans = w.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), collector); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index fbf4708ebb4..6a0420d1115 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.payloads; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.similarities.DefaultSimilarity; @@ -27,13 +28,12 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.spans.BufferedSpanCollector; import org.apache.lucene.search.spans.SpanCollector; -import org.apache.lucene.search.spans.SpanCollectorFactory; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; +import org.apache.lucene.search.spans.SpanSimilarity; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.search.spans.TermSpans; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -70,7 +70,9 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new PayloadTermWeight(this, searcher); + TermContext context = TermContext.build(searcher.getTopReaderContext(), term); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context)); + return new PayloadTermWeight(context, similarity); } private static class PayloadTermCollector implements SpanCollector { @@ -103,20 +105,20 @@ public class PayloadTermQuery extends SpanTermQuery { } } - private class PayloadTermWeight extends SpanWeight { + private class PayloadTermWeight extends SpanTermWeight { - public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher) + public PayloadTermWeight(TermContext context, SpanSimilarity similarity) throws IOException { - super(query, searcher, SpanCollectorFactory.NO_OP_FACTORY); + super(context, similarity, PayloadSpanCollector.FACTORY); } @Override public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { PayloadTermCollector collector = new PayloadTermCollector(); - TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts, collector); + Spans spans = super.getSpans(context, acceptDocs, collector); return (spans == null) ? null - : new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(stats, context)); + : new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(context)); } protected class PayloadTermSpanScorer extends SpanScorer { @@ -125,7 +127,7 @@ public class PayloadTermQuery extends SpanTermQuery { protected int payloadsSeen; private final PayloadTermCollector payloadCollector; - public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, PayloadTermCollector collector, + public PayloadTermSpanScorer(Spans spans, SpanWeight weight, PayloadTermCollector collector, Similarity.SimScorer docScorer) throws IOException { super(spans, weight, docScorer); this.payloadCollector = collector; @@ -206,7 +208,7 @@ public class PayloadTermQuery extends SpanTermQuery { if (newDoc == doc) { float freq = scorer.sloppyFreq(); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); - SimScorer docScorer = similarity.simScorer(stats, context); + SimScorer docScorer = similarity.simScorer(context); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); Explanation expl = Explanation.match( scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java index f50a732f572..a219d1d30ce 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java @@ -50,7 +50,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new SpanWeight(this, searcher, PayloadSpanCollector.FACTORY); + return createWeight(searcher, needsScores, PayloadSpanCollector.FACTORY); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java index 6152c545aeb..9f270fb962e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java @@ -58,7 +58,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new SpanWeight(this, searcher, PayloadSpanCollector.FACTORY); + return super.createWeight(searcher, needsScores, PayloadSpanCollector.FACTORY); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 780d64badee..85405bf4fbf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -18,18 +18,12 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; -import java.util.Map; import java.util.Objects; -import java.util.Set; /** *

Wrapper to allow {@link SpanQuery} objects participate in composite @@ -94,20 +88,10 @@ public class FieldMaskingSpanQuery extends SpanQuery { // :NOTE: getBoost and setBoost are not proxied to the maskedQuery // ...this is done to be more consistent with things like SpanFirstQuery - - @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { - return maskedQuery.getSpans(context, acceptDocs, termContexts, collector); - } @Override - public void extractTerms(Set terms) { - maskedQuery.extractTerms(terms); - } - - @Override - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return maskedQuery.createWeight(searcher, needsScores); + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + return maskedQuery.createWeight(searcher, needsScores, factory); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollectorFactory.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollectorFactory.java index afdca261d29..274ba7467d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollectorFactory.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollectorFactory.java @@ -19,19 +19,18 @@ package org.apache.lucene.search.spans; /** * Interface defining a factory for creating new {@link SpanCollector}s - * @param the SpanCollector type */ -public interface SpanCollectorFactory { +public interface SpanCollectorFactory { /** * @return a new SpanCollector */ - T newCollector(); + SpanCollector newCollector(); /** * Factory for creating NO_OP collectors */ - public static final SpanCollectorFactory NO_OP_FACTORY = new SpanCollectorFactory() { + public static final SpanCollectorFactory NO_OP_FACTORY = new SpanCollectorFactory() { @Override public SpanCollector newCollector() { return SpanCollector.NO_OP; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java index 43197c24c85..198d7fef89d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java @@ -31,6 +31,7 @@ import java.util.Objects; import java.util.Set; abstract class SpanContainQuery extends SpanQuery implements Cloneable { + SpanQuery big; SpanQuery little; @@ -48,26 +49,48 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable { @Override public String getField() { return big.getField(); } - /** Extract terms from both big and little. */ - @Override - public void extractTerms(Set terms) { - big.extractTerms(terms); - little.extractTerms(terms); - } + public abstract class SpanContainWeight extends SpanWeight { - ArrayList prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { - Spans bigSpans = big.getSpans(context, acceptDocs, termContexts, collector); - if (bigSpans == null) { - return null; + final SpanWeight bigWeight; + final SpanWeight littleWeight; + + public SpanContainWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { + super(SpanContainQuery.this, similarity, factory); + this.bigWeight = bigWeight; + this.littleWeight = littleWeight; } - Spans littleSpans = little.getSpans(context, acceptDocs, termContexts, collector); - if (littleSpans == null) { - return null; + + /** + * Extract terms from both big and little. + */ + @Override + public void extractTerms(Set terms) { + bigWeight.extractTerms(terms); + littleWeight.extractTerms(terms); } - ArrayList bigAndLittle = new ArrayList<>(); - bigAndLittle.add(bigSpans); - bigAndLittle.add(littleSpans); - return bigAndLittle; + + ArrayList prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException { + Spans bigSpans = bigWeight.getSpans(context, acceptDocs, collector); + if (bigSpans == null) { + return null; + } + Spans littleSpans = littleWeight.getSpans(context, acceptDocs, collector); + if (littleSpans == null) { + return null; + } + ArrayList bigAndLittle = new ArrayList<>(); + bigAndLittle.add(bigSpans); + bigAndLittle.add(littleSpans); + return bigAndLittle; + } + + @Override + public void extractTermContexts(Map contexts) { + bigWeight.extractTermContexts(contexts); + littleWeight.extractTermContexts(contexts); + } + } String toString(String field, String name) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 9a0b3c1fa8f..88c304a9bc2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -18,13 +18,11 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.ArrayList; -import java.util.Map; /** Keep matches that contain another Spans. */ public class SpanContainingQuery extends SpanContainQuery { @@ -48,63 +46,79 @@ public class SpanContainingQuery extends SpanContainQuery { (SpanQuery) big.clone(), (SpanQuery) little.clone()); } - - /** - * Return spans from big that contain at least one spans from little. - * The payload is from the spans of big. - */ + @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { - ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts, collector); - if (containerContained == null) { - return null; + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, false, factory); + SpanWeight littleWeight = little.createWeight(searcher, false, factory); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight); + return new SpanContainingWeight(similarity, factory, bigWeight, littleWeight); + } + + public class SpanContainingWeight extends SpanContainWeight { + + public SpanContainingWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { + super(similarity, factory, bigWeight, littleWeight); } - - Spans big = containerContained.get(0); - Spans little = containerContained.get(1); - return new ContainSpans(big, little, big) { - - @Override - boolean twoPhaseCurrentDocMatches() throws IOException { - oneExhaustedInCurrentDoc = false; - assert littleSpans.startPosition() == -1; - while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) { - while (littleSpans.startPosition() < bigSpans.startPosition()) { - if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) { - oneExhaustedInCurrentDoc = true; - return false; - } - } - if (bigSpans.endPosition() >= littleSpans.endPosition()) { - atFirstInCurrentDoc = true; - return true; - } - } - oneExhaustedInCurrentDoc = true; - return false; + /** + * Return spans from big that contain at least one spans from little. + * The payload is from the spans of big. + */ + @Override + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException { + ArrayList containerContained = prepareConjunction(context, acceptDocs, collector); + if (containerContained == null) { + return null; } - @Override - public int nextStartPosition() throws IOException { - if (atFirstInCurrentDoc) { - atFirstInCurrentDoc = false; - return bigSpans.startPosition(); - } - while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) { - while (littleSpans.startPosition() < bigSpans.startPosition()) { - if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) { - oneExhaustedInCurrentDoc = true; - return NO_MORE_POSITIONS; + Spans big = containerContained.get(0); + Spans little = containerContained.get(1); + + return new ContainSpans(big, little, big) { + + @Override + boolean twoPhaseCurrentDocMatches() throws IOException { + oneExhaustedInCurrentDoc = false; + assert littleSpans.startPosition() == -1; + while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) { + while (littleSpans.startPosition() < bigSpans.startPosition()) { + if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + return false; + } + } + if (bigSpans.endPosition() >= littleSpans.endPosition()) { + atFirstInCurrentDoc = true; + return true; } } - if (bigSpans.endPosition() >= littleSpans.endPosition()) { + oneExhaustedInCurrentDoc = true; + return false; + } + + @Override + public int nextStartPosition() throws IOException { + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; return bigSpans.startPosition(); } + while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) { + while (littleSpans.startPosition() < bigSpans.startPosition()) { + if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + return NO_MORE_POSITIONS; + } + } + if (bigSpans.endPosition() >= littleSpans.endPosition()) { + return bigSpans.startPosition(); + } + } + oneExhaustedInCurrentDoc = true; + return NO_MORE_POSITIONS; } - oneExhaustedInCurrentDoc = true; - return NO_MORE_POSITIONS; - } - }; + }; + } } } \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index 44b88e4bae3..b7011eac3b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -18,20 +18,17 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.TopTermsRewrite; -import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Map; import java.util.Objects; -import java.util.Set; /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, @@ -75,11 +72,6 @@ public class SpanMultiTermQueryWrapper extends SpanQue } } - @Override - protected void extractTerms(Set terms) { - throw new IllegalStateException("Rewrite first"); - } - /** * Expert: returns the rewriteMethod */ @@ -97,17 +89,17 @@ public class SpanMultiTermQueryWrapper extends SpanQue public final void setRewriteMethod(SpanRewriteMethod rewriteMethod) { query.setRewriteMethod(rewriteMethod); } - - @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { - throw new UnsupportedOperationException("Query should have been rewritten"); - } @Override public String getField() { return query.getField(); } - + + @Override + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + throw new IllegalArgumentException("Rewrite first!"); + } + /** Returns the wrapped query */ public Query getWrappedQuery() { return query; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 844b2d9a9b7..a0c431b7ebe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -89,13 +90,6 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { @Override public String getField() { return field; } - @Override - public void extractTerms(Set terms) { - for (final SpanQuery clause : clauses) { - clause.extractTerms(terms); - } - } - @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); @@ -118,27 +112,61 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + List subWeights = new ArrayList<>(); + for (SpanQuery q : clauses) { + subWeights.add(q.createWeight(searcher, false, factory)); + } + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); + return new SpanNearWeight(subWeights, similarity, factory); + } - Terms terms = context.reader().terms(field); - if (terms == null) { - return null; // field does not exist + public class SpanNearWeight extends SpanWeight { + + final List subWeights; + + public SpanNearWeight(List subWeights, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException { + super(SpanNearQuery.this, similarity, factory); + this.subWeights = subWeights; } - ArrayList subSpans = new ArrayList<>(clauses.size()); - SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector; - for (SpanQuery seq : clauses) { - Spans subSpan = seq.getSpans(context, acceptDocs, termContexts, subSpanCollector); - if (subSpan != null) { - subSpans.add(subSpan); - } else { - return null; // all required + @Override + public void extractTermContexts(Map contexts) { + for (SpanWeight w : subWeights) { + w.extractTermContexts(contexts); } } - - // all NearSpans require at least two subSpans - return (! inOrder) ? new NearSpansUnordered(this, subSpans) : new NearSpansOrdered(this, subSpans, collector); + @Override + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException { + + Terms terms = context.reader().terms(field); + if (terms == null) { + return null; // field does not exist + } + + ArrayList subSpans = new ArrayList<>(clauses.size()); + SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector; + for (SpanWeight w : subWeights) { + Spans subSpan = w.getSpans(context, acceptDocs, subSpanCollector); + if (subSpan != null) { + subSpans.add(subSpan); + } else { + return null; // all required + } + } + + // all NearSpans require at least two subSpans + return (!inOrder) ? new NearSpansUnordered(SpanNearQuery.this, subSpans) + : new NearSpansOrdered(SpanNearQuery.this, subSpans, collector); + } + + @Override + public void extractTerms(Set terms) { + for (SpanWeight w : subWeights) { + w.extractTerms(terms); + } + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index ce1b841176b..73eea4384f5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.util.Bits; @@ -77,9 +78,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { @Override public String getField() { return include.getField(); } - @Override - public void extractTerms(Set terms) { include.extractTerms(terms); } - @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); @@ -105,69 +103,100 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { - Spans includeSpans = include.getSpans(context, acceptDocs, termContexts, collector); - if (includeSpans == null) { - return null; + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + SpanWeight includeWeight = include.createWeight(searcher, false, factory); + SpanWeight excludeWeight = exclude.createWeight(searcher, false, factory); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, includeWeight); + return new SpanNotWeight(similarity, factory, includeWeight, excludeWeight); + } + + public class SpanNotWeight extends SpanWeight { + + final SpanWeight includeWeight; + final SpanWeight excludeWeight; + + public SpanNotWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + SpanWeight includeWeight, SpanWeight excludeWeight) throws IOException { + super(SpanNotQuery.this, similarity, factory); + this.includeWeight = includeWeight; + this.excludeWeight = excludeWeight; } - Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts, collector); - if (excludeSpans == null) { - return includeSpans; + @Override + public void extractTermContexts(Map contexts) { + includeWeight.extractTermContexts(contexts); } - - TwoPhaseIterator excludeTwoPhase = excludeSpans.asTwoPhaseIterator(); - DocIdSetIterator excludeApproximation = excludeTwoPhase == null ? null : excludeTwoPhase.approximation(); - - return new FilterSpans(includeSpans) { - // last document we have checked matches() against for the exclusion, and failed - // when using approximations, so we don't call it again, and pass thru all inclusions. - int lastApproxDoc = -1; - boolean lastApproxResult = false; - - @Override - protected AcceptStatus accept(Spans candidate) throws IOException { - // TODO: this logic is ugly and sneaky, can we clean it up? - int doc = candidate.docID(); - if (doc > excludeSpans.docID()) { - // catch up 'exclude' to the current doc - if (excludeTwoPhase != null) { - if (excludeApproximation.advance(doc) == doc) { - lastApproxDoc = doc; - lastApproxResult = excludeTwoPhase.matches(); - } - } else { - excludeSpans.advance(doc); - } - } else if (excludeTwoPhase != null && doc == excludeSpans.docID() && doc != lastApproxDoc) { - // excludeSpans already sitting on our candidate doc, but matches not called yet. - lastApproxDoc = doc; - lastApproxResult = excludeTwoPhase.matches(); - } - - if (doc != excludeSpans.docID() || (doc == lastApproxDoc && lastApproxResult == false)) { - return AcceptStatus.YES; - } - - if (excludeSpans.startPosition() == -1) { // init exclude start position if needed - excludeSpans.nextStartPosition(); - } - - while (excludeSpans.endPosition() <= candidate.startPosition() - pre) { - // exclude end position is before a possible exclusion - if (excludeSpans.nextStartPosition() == NO_MORE_POSITIONS) { - return AcceptStatus.YES; // no more exclude at current doc. - } - } - - // exclude end position far enough in current doc, check start position: - if (candidate.endPosition() + post <= excludeSpans.startPosition()) { - return AcceptStatus.YES; - } else { - return AcceptStatus.NO; - } + + @Override + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException { + Spans includeSpans = includeWeight.getSpans(context, acceptDocs, collector); + if (includeSpans == null) { + return null; } - }; + + Spans excludeSpans = excludeWeight.getSpans(context, acceptDocs, collector); + if (excludeSpans == null) { + return includeSpans; + } + + TwoPhaseIterator excludeTwoPhase = excludeSpans.asTwoPhaseIterator(); + DocIdSetIterator excludeApproximation = excludeTwoPhase == null ? null : excludeTwoPhase.approximation(); + + return new FilterSpans(includeSpans) { + // last document we have checked matches() against for the exclusion, and failed + // when using approximations, so we don't call it again, and pass thru all inclusions. + int lastApproxDoc = -1; + boolean lastApproxResult = false; + + @Override + protected AcceptStatus accept(Spans candidate) throws IOException { + // TODO: this logic is ugly and sneaky, can we clean it up? + int doc = candidate.docID(); + if (doc > excludeSpans.docID()) { + // catch up 'exclude' to the current doc + if (excludeTwoPhase != null) { + if (excludeApproximation.advance(doc) == doc) { + lastApproxDoc = doc; + lastApproxResult = excludeTwoPhase.matches(); + } + } else { + excludeSpans.advance(doc); + } + } else if (excludeTwoPhase != null && doc == excludeSpans.docID() && doc != lastApproxDoc) { + // excludeSpans already sitting on our candidate doc, but matches not called yet. + lastApproxDoc = doc; + lastApproxResult = excludeTwoPhase.matches(); + } + + if (doc != excludeSpans.docID() || (doc == lastApproxDoc && lastApproxResult == false)) { + return AcceptStatus.YES; + } + + if (excludeSpans.startPosition() == -1) { // init exclude start position if needed + excludeSpans.nextStartPosition(); + } + + while (excludeSpans.endPosition() <= candidate.startPosition() - pre) { + // exclude end position is before a possible exclusion + if (excludeSpans.nextStartPosition() == NO_MORE_POSITIONS) { + return AcceptStatus.YES; // no more exclude at current doc. + } + } + + // exclude end position far enough in current doc, check start position: + if (candidate.endPosition() + post <= excludeSpans.startPosition()) { + return AcceptStatus.YES; + } else { + return AcceptStatus.NO; + } + } + }; + } + + @Override + public void extractTerms(Set terms) { + includeWeight.extractTerms(terms); + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index d84e299e6d0..85a890996d7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.TermContext; import org.apache.lucene.search.DisiPriorityQueue; import org.apache.lucene.search.DisiWrapper; import org.apache.lucene.search.DisjunctionDISIApproximation; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.util.Bits; @@ -71,13 +72,6 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { @Override public String getField() { return field; } - @Override - public void extractTerms(Set terms) { - for(final SpanQuery clause: clauses) { - clause.extractTerms(terms); - } - } - @Override public SpanOrQuery clone() { int sz = clauses.size(); @@ -143,190 +137,223 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { return h; } - @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) - throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + List subWeights = new ArrayList<>(clauses.size()); + for (SpanQuery q : clauses) { + subWeights.add(q.createWeight(searcher, false, factory)); + } + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights); + return new SpanOrWeight(similarity, factory, subWeights); + } - ArrayList subSpans = new ArrayList<>(clauses.size()); + public class SpanOrWeight extends SpanWeight { - for (SpanQuery sq : clauses) { - Spans spans = sq.getSpans(context, acceptDocs, termContexts, collector); - if (spans != null) { - subSpans.add(spans); + final List subWeights; + + public SpanOrWeight(SpanSimilarity similarity, SpanCollectorFactory factory, List subWeights) throws IOException { + super(SpanOrQuery.this, similarity, factory); + this.subWeights = subWeights; + } + + @Override + public void extractTerms(Set terms) { + for (final SpanWeight w: subWeights) { + w.extractTerms(terms); } } - if (subSpans.size() == 0) { - return null; - } else if (subSpans.size() == 1) { - return subSpans.get(0); + @Override + public void extractTermContexts(Map contexts) { + for (SpanWeight w : subWeights) { + w.extractTermContexts(contexts); + } } - DisiPriorityQueue byDocQueue = new DisiPriorityQueue<>(subSpans.size()); - for (Spans spans : subSpans) { - byDocQueue.add(new DisiWrapper<>(spans)); - } + @Override + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) + throws IOException { - SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1 + ArrayList subSpans = new ArrayList<>(clauses.size()); - return new Spans() { - Spans topPositionSpans = null; - - @Override - public int nextDoc() throws IOException { - topPositionSpans = null; - DisiWrapper topDocSpans = byDocQueue.top(); - int currentDoc = topDocSpans.doc; - do { - topDocSpans.doc = topDocSpans.iterator.nextDoc(); - topDocSpans = byDocQueue.updateTop(); - } while (topDocSpans.doc == currentDoc); - return topDocSpans.doc; + for (SpanWeight w : subWeights) { + Spans spans = w.getSpans(context, acceptDocs, collector); + if (spans != null) { + subSpans.add(spans); + } } - @Override - public int advance(int target) throws IOException { - topPositionSpans = null; - DisiWrapper topDocSpans = byDocQueue.top(); - do { - topDocSpans.doc = topDocSpans.iterator.advance(target); - topDocSpans = byDocQueue.updateTop(); - } while (topDocSpans.doc < target); - return topDocSpans.doc; + if (subSpans.size() == 0) { + return null; + } else if (subSpans.size() == 1) { + return subSpans.get(0); } - @Override - public int docID() { - DisiWrapper topDocSpans = byDocQueue.top(); - return topDocSpans.doc; + DisiPriorityQueue byDocQueue = new DisiPriorityQueue<>(subSpans.size()); + for (Spans spans : subSpans) { + byDocQueue.add(new DisiWrapper<>(spans)); } - @Override - public TwoPhaseIterator asTwoPhaseIterator() { - boolean hasApproximation = false; - for (DisiWrapper w : byDocQueue) { - if (w.twoPhaseView != null) { - hasApproximation = true; - break; - } + SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1 + + return new Spans() { + Spans topPositionSpans = null; + + @Override + public int nextDoc() throws IOException { + topPositionSpans = null; + DisiWrapper topDocSpans = byDocQueue.top(); + int currentDoc = topDocSpans.doc; + do { + topDocSpans.doc = topDocSpans.iterator.nextDoc(); + topDocSpans = byDocQueue.updateTop(); + } while (topDocSpans.doc == currentDoc); + return topDocSpans.doc; } - if (! hasApproximation) { // none of the sub spans supports approximations - return null; + @Override + public int advance(int target) throws IOException { + topPositionSpans = null; + DisiWrapper topDocSpans = byDocQueue.top(); + do { + topDocSpans.doc = topDocSpans.iterator.advance(target); + topDocSpans = byDocQueue.updateTop(); + } while (topDocSpans.doc < target); + return topDocSpans.doc; } - return new TwoPhaseIterator(new DisjunctionDISIApproximation(byDocQueue)) { - @Override - public boolean matches() throws IOException { - return twoPhaseCurrentDocMatches(); - } - }; - } - - int lastDocTwoPhaseMatched = -1; - - boolean twoPhaseCurrentDocMatches() throws IOException { - DisiWrapper listAtCurrentDoc = byDocQueue.topList(); - // remove the head of the list as long as it does not match - final int currentDoc = listAtCurrentDoc.doc; - while (listAtCurrentDoc.twoPhaseView != null) { - if (listAtCurrentDoc.twoPhaseView.matches()) { - // use this spans for positions at current doc: - listAtCurrentDoc.lastApproxMatchDoc = currentDoc; - break; - } - // do not use this spans for positions at current doc: - listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc; - listAtCurrentDoc = listAtCurrentDoc.next; - if (listAtCurrentDoc == null) { - return false; - } + @Override + public int docID() { + DisiWrapper topDocSpans = byDocQueue.top(); + return topDocSpans.doc; } - lastDocTwoPhaseMatched = currentDoc; - topPositionSpans = null; - return true; - } - void fillPositionQueue() throws IOException { // called at first nextStartPosition - assert byPositionQueue.size() == 0; - // add all matching Spans at current doc to byPositionQueue - DisiWrapper listAtCurrentDoc = byDocQueue.topList(); - while (listAtCurrentDoc != null) { - Spans spansAtDoc = listAtCurrentDoc.iterator; - if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation - if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation - if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false - spansAtDoc = null; - } else { - if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) { - if (! listAtCurrentDoc.twoPhaseView.matches()) { - spansAtDoc = null; - } - } - } + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + boolean hasApproximation = false; + for (DisiWrapper w : byDocQueue) { + if (w.twoPhaseView != null) { + hasApproximation = true; + break; } } - if (spansAtDoc != null) { - assert spansAtDoc.docID() == listAtCurrentDoc.doc; - assert spansAtDoc.startPosition() == -1; - spansAtDoc.nextStartPosition(); - assert spansAtDoc.startPosition() != NO_MORE_POSITIONS; - byPositionQueue.add(spansAtDoc); + if (!hasApproximation) { // none of the sub spans supports approximations + return null; } - listAtCurrentDoc = listAtCurrentDoc.next; + + return new TwoPhaseIterator(new DisjunctionDISIApproximation(byDocQueue)) { + @Override + public boolean matches() throws IOException { + return twoPhaseCurrentDocMatches(); + } + }; } - assert byPositionQueue.size() > 0; - } - - @Override - public int nextStartPosition() throws IOException { - if (topPositionSpans == null) { - byPositionQueue.clear(); - fillPositionQueue(); // fills byPositionQueue at first position - topPositionSpans = byPositionQueue.top(); - } else { - topPositionSpans.nextStartPosition(); - topPositionSpans = byPositionQueue.updateTop(); - } - return topPositionSpans.startPosition(); - } - @Override - public int startPosition() { - return topPositionSpans == null ? -1 : topPositionSpans.startPosition(); - } + int lastDocTwoPhaseMatched = -1; - @Override - public int endPosition() { - return topPositionSpans == null ? -1 : topPositionSpans.endPosition(); - } - - @Override - public void collect(SpanCollector collector) throws IOException { - if (topPositionSpans != null) - topPositionSpans.collect(collector); - } - - @Override - public String toString() { - return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition(); - } - - long cost = -1; - - @Override - public long cost() { - if (cost == -1) { - cost = 0; - for (Spans spans : subSpans) { - cost += spans.cost(); + boolean twoPhaseCurrentDocMatches() throws IOException { + DisiWrapper listAtCurrentDoc = byDocQueue.topList(); + // remove the head of the list as long as it does not match + final int currentDoc = listAtCurrentDoc.doc; + while (listAtCurrentDoc.twoPhaseView != null) { + if (listAtCurrentDoc.twoPhaseView.matches()) { + // use this spans for positions at current doc: + listAtCurrentDoc.lastApproxMatchDoc = currentDoc; + break; + } + // do not use this spans for positions at current doc: + listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc; + listAtCurrentDoc = listAtCurrentDoc.next; + if (listAtCurrentDoc == null) { + return false; + } } + lastDocTwoPhaseMatched = currentDoc; + topPositionSpans = null; + return true; } - return cost; - } - }; + + void fillPositionQueue() throws IOException { // called at first nextStartPosition + assert byPositionQueue.size() == 0; + // add all matching Spans at current doc to byPositionQueue + DisiWrapper listAtCurrentDoc = byDocQueue.topList(); + while (listAtCurrentDoc != null) { + Spans spansAtDoc = listAtCurrentDoc.iterator; + if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation + if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation + if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false + spansAtDoc = null; + } else { + if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) { + if (!listAtCurrentDoc.twoPhaseView.matches()) { + spansAtDoc = null; + } + } + } + } + } + + if (spansAtDoc != null) { + assert spansAtDoc.docID() == listAtCurrentDoc.doc; + assert spansAtDoc.startPosition() == -1; + spansAtDoc.nextStartPosition(); + assert spansAtDoc.startPosition() != NO_MORE_POSITIONS; + byPositionQueue.add(spansAtDoc); + } + listAtCurrentDoc = listAtCurrentDoc.next; + } + assert byPositionQueue.size() > 0; + } + + @Override + public int nextStartPosition() throws IOException { + if (topPositionSpans == null) { + byPositionQueue.clear(); + fillPositionQueue(); // fills byPositionQueue at first position + topPositionSpans = byPositionQueue.top(); + } else { + topPositionSpans.nextStartPosition(); + topPositionSpans = byPositionQueue.updateTop(); + } + return topPositionSpans.startPosition(); + } + + @Override + public int startPosition() { + return topPositionSpans == null ? -1 : topPositionSpans.startPosition(); + } + + @Override + public int endPosition() { + return topPositionSpans == null ? -1 : topPositionSpans.endPosition(); + } + + @Override + public void collect(SpanCollector collector) throws IOException { + if (topPositionSpans != null) + topPositionSpans.collect(collector); + } + + @Override + public String toString() { + return "spanOr(" + SpanOrQuery.this + ")@" + docID() + ": " + startPosition() + " - " + endPosition(); + } + + long cost = -1; + + @Override + public long cost() { + if (cost == -1) { + cost = 0; + for (Spans spans : subSpans) { + cost += spans.cost(); + } + } + return cost; + } + }; + } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index d361d488733..a848bb38254 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; import org.apache.lucene.util.Bits; @@ -47,18 +48,9 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea * */ public SpanQuery getMatch() { return match; } - - @Override public String getField() { return match.getField(); } - - - @Override - public void extractTerms(Set terms) { - match.extractTerms(terms); - } - /** * Implementing classes are required to return whether the current position is a match for the passed in * "match" {@link SpanQuery}. @@ -66,7 +58,6 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea * This is only called if the underlying last {@link Spans#nextStartPosition()} for the * match indicated a valid start position. * - * * @param spans The {@link Spans} instance, positioned at the spot to check * @param collector the {@link SpanCollector} associated with the Spans * @@ -78,14 +69,47 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected abstract AcceptStatus acceptPosition(Spans spans, SpanCollector collector) throws IOException; @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { - Spans matchSpans = match.getSpans(context, acceptDocs, termContexts, collector); - return (matchSpans == null) ? null : new FilterSpans(matchSpans) { - @Override - protected AcceptStatus accept(Spans candidate) throws IOException { - return acceptPosition(candidate, collector); - } - }; + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + SpanWeight matchWeight = match.createWeight(searcher, false, factory); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, matchWeight); + return new SpanPositionCheckWeight(matchWeight, similarity, factory); + } + + public class SpanPositionCheckWeight extends SpanWeight { + + final SpanWeight matchWeight; + + public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity, + SpanCollectorFactory collectorFactory) throws IOException { + super(SpanPositionCheckQuery.this, similarity, collectorFactory); + this.matchWeight = matchWeight; + } + + public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity) throws IOException { + this(matchWeight, similarity, SpanCollectorFactory.NO_OP_FACTORY); + } + + @Override + public void extractTerms(Set terms) { + matchWeight.extractTerms(terms); + } + + @Override + public void extractTermContexts(Map contexts) { + matchWeight.extractTermContexts(contexts); + } + + @Override + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException { + Spans matchSpans = matchWeight.getSpans(context, acceptDocs, collector); + return (matchSpans == null) ? null : new FilterSpans(matchSpans) { + @Override + protected AcceptStatus accept(Spans candidate) throws IOException { + return acceptPosition(candidate, collector); + } + }; + } + } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index d3cc54050f5..fea6d98696a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -17,47 +17,33 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Map; -import java.util.Set; /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { - /** Expert: Returns the matches for this query in an index. - * Used internally to search for spans. - * This may return null to indicate that the SpanQuery has no results. - */ - public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException; - - /** - * Extract terms from these spans. - * @lucene.internal - * @see Weight#extractTerms - */ - protected abstract void extractTerms(Set terms); /** * Returns the name of the field matched by this query. - *

- * Note that this may return null if the query matches no terms. */ public abstract String getField(); + /** + * Create a SpanWeight for this query + * @param searcher the IndexSearcher to be searched across + * @param needsScores if the query needs scores + * @param collectorFactory a SpanCollectorFactory to use in collecting postings data + * @return a SpanWeight + * @throws IOException on error + */ + public abstract SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, + SpanCollectorFactory collectorFactory) throws IOException; + @Override - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new SpanWeight(this, searcher, getSpanCollectorFactory()); + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return createWeight(searcher, needsScores, SpanCollectorFactory.NO_OP_FACTORY); } - - protected SpanCollectorFactory getSpanCollectorFactory() { - return SpanCollectorFactory.NO_OP_FACTORY; - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 7b8dea3c724..b60bf0d1d90 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -17,13 +17,13 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Objects; - import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.similarities.Similarity; +import java.io.IOException; +import java.util.Objects; + /** * Public for extension only. */ @@ -42,7 +42,7 @@ public class SpanScorer extends Scorer { protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(weight); - this.docScorer = Objects.requireNonNull(docScorer); + this.docScorer = docScorer; this.spans = Objects.requireNonNull(spans); } @@ -91,6 +91,10 @@ public class SpanScorer extends Scorer { // assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos; assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos; numMatches++; + if (docScorer == null) { // scores not required, break out here + freq = 1; + return; + } int matchLength = endPos - startPos; freq += docScorer.computeSlopFactor(matchLength); prevStartPos = startPos; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java new file mode 100644 index 00000000000..517f7aba34f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanSimilarity.java @@ -0,0 +1,202 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Encapsulates similarity statistics required for SpanScorers + */ +public abstract class SpanSimilarity { + + /** + * The field term statistics are taken from + */ + protected final String field; + + /** + * Create a new SpanSimilarity + * @param field the similarity field for term statistics + */ + protected SpanSimilarity(String field) { + this.field = field; + } + + /** + * Create a SimScorer for this SpanSimilarity's statistics + * @param context the LeafReaderContext to calculate the scorer for + * @return a SimScorer, or null if no scoring is required + * @throws IOException on error + */ + public abstract Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException; + + /** + * @return the field for term statistics + */ + public String getField() { + return field; + } + + /** + * See {@link org.apache.lucene.search.Weight#getValueForNormalization()} + * + * @return the value for normalization + * @throws IOException on error + */ + public abstract float getValueForNormalization() throws IOException; + + /** + * See {@link org.apache.lucene.search.Weight#normalize(float,float)} + * + * @param queryNorm the query norm + * @param topLevelBoost the top level boost + */ + public abstract void normalize(float queryNorm, float topLevelBoost); + + /** + * A SpanSimilarity class that calculates similarity statistics based on the term statistics + * of a set of terms. + */ + public static class ScoringSimilarity extends SpanSimilarity { + + private final Similarity similarity; + private final Similarity.SimWeight stats; + + private ScoringSimilarity(SpanQuery query, IndexSearcher searcher, TermStatistics... termStats) throws IOException { + super(query.getField()); + this.similarity = searcher.getSimilarity(); + this.stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats); + } + + @Override + public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException { + return similarity.simScorer(stats, context); + } + + @Override + public String getField() { + return field; + } + + @Override + public float getValueForNormalization() throws IOException { + return stats.getValueForNormalization(); + } + + @Override + public void normalize(float queryNorm, float topLevelBoost) { + stats.normalize(queryNorm, topLevelBoost); + } + + } + + /** + * A SpanSimilarity class that does no scoring + */ + public static class NonScoringSimilarity extends SpanSimilarity { + + private NonScoringSimilarity(String field) { + super(field); + } + + @Override + public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException { + return null; + } + + @Override + public float getValueForNormalization() throws IOException { + return 0; + } + + @Override + public void normalize(float queryNorm, float topLevelBoost) { + + } + } + + /** + * Build a SpanSimilarity + * @param query the SpanQuery to be run + * @param searcher the searcher + * @param needsScores whether or not scores are required + * @param stats an array of TermStatistics to use in creating the similarity + * @return a SpanSimilarity, or null if there are no statistics to use + * @throws IOException on error + */ + public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, + boolean needsScores, TermStatistics... stats) throws IOException { + return needsScores ? new ScoringSimilarity(query, searcher, stats) : new NonScoringSimilarity(query.getField()); + } + + /** + * Build a SpanSimilarity + * @param query the SpanQuery to be run + * @param searcher the searcher + * @param needsScores whether or not scores are required + * @param weights a set of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from + * @return a SpanSimilarity, or null if there are no statistics to use + * @throws IOException on error + */ + public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, List weights) throws IOException { + return build(query, searcher, needsScores, weights.toArray(new SpanWeight[weights.size()])); + } + + /** + * Build a SpanSimilarity + * @param query the SpanQuery to run + * @param searcher the searcher + * @param needsScores whether or not scores are required + * @param weights an array of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from + * @return a SpanSimilarity, or null if there are no statistics to use + * @throws IOException on error + */ + public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, SpanWeight... weights) throws IOException { + + if (!needsScores) + return new NonScoringSimilarity(query.getField()); + + Map contexts = new HashMap<>(); + for (SpanWeight w : weights) { + w.extractTermContexts(contexts); + } + + if (contexts.size() == 0) + return null; + + TermStatistics[] stats = new TermStatistics[contexts.size()]; + int i = 0; + for (Term term : contexts.keySet()) { + stats[i] = searcher.termStatistics(term, contexts.get(term)); + i++; + } + + return new ScoringSimilarity(query, searcher, stats); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 3ac2f2d62c6..f6647a1e3a4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -50,8 +51,51 @@ public class SpanTermQuery extends SpanQuery { public String getField() { return term.field(); } @Override - public void extractTerms(Set terms) { - terms.add(term); + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + TermContext context = TermContext.build(searcher.getTopReaderContext(), term); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context)); + return new SpanTermWeight(context, similarity, factory); + } + + public class SpanTermWeight extends SpanWeight { + + final TermContext termContext; + + public SpanTermWeight(TermContext termContext, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException { + super(SpanTermQuery.this, similarity, factory); + this.termContext = termContext; + } + + @Override + public void extractTerms(Set terms) { + terms.add(term); + } + + @Override + public void extractTermContexts(Map contexts) { + contexts.put(term, termContext); + } + + @Override + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException { + + final TermState state = termContext.get(context.ord); + if (state == null) { // term is not present in that reader + return null; + } + + final Terms terms = context.reader().terms(term.field()); + if (terms == null) + return null; + if (terms.hasPositions() == false) + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); + + final TermsEnum termsEnum = terms.iterator(); + termsEnum.seekExact(term.bytes(), state); + + final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings()); + return new TermSpans(postings, term); + } } @Override @@ -82,40 +126,4 @@ public class SpanTermQuery extends SpanQuery { return term.equals(other.term); } - @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { - TermContext termContext = termContexts.get(term); - final TermState state; - if (termContext == null) { - // this happens with span-not query, as it doesn't include the NOT side in extractTerms() - // so we seek to the term now in this segment..., this sucks because it's ugly mostly! - final Terms terms = context.reader().terms(term.field()); - if (terms != null) { - if (terms.hasPositions() == false) { - throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); - } - - final TermsEnum termsEnum = terms.iterator(); - if (termsEnum.seekExact(term.bytes())) { - state = termsEnum.termState(); - } else { - state = null; - } - } else { - state = null; - } - } else { - state = termContext.get(context.ord); - } - - if (state == null) { // term is not present in that reader - return null; - } - - final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(); - termsEnum.seekExact(term.bytes(), state); - - final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings()); - return new TermSpans(postings, term); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index e98b423cda6..45d5a9ac02c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -17,97 +17,91 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.Weight; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.HashMap; import java.util.Map; -import java.util.Set; -import java.util.TreeSet; /** * Expert-only. Public for use by other weight implementations */ -public class SpanWeight extends Weight { - protected final Similarity similarity; - protected final Map termContexts; - protected final SpanQuery query; - protected final SpanCollectorFactory collectorFactory; - protected Similarity.SimWeight stats; +public abstract class SpanWeight extends Weight { - public SpanWeight(SpanQuery query, IndexSearcher searcher, SpanCollectorFactory collectorFactory) throws IOException { + protected final SpanSimilarity similarity; + protected final SpanCollectorFactory collectorFactory; + + /** + * Create a new SpanWeight + * @param query the parent query + * @param similarity a SpanSimilarity to be used for scoring + * @param collectorFactory a SpanCollectorFactory to be used for Span collection + * @throws IOException on error + */ + public SpanWeight(SpanQuery query, SpanSimilarity similarity, SpanCollectorFactory collectorFactory) throws IOException { super(query); - this.similarity = searcher.getSimilarity(); - this.query = query; + this.similarity = similarity; this.collectorFactory = collectorFactory; - - termContexts = new HashMap<>(); - TreeSet terms = new TreeSet<>(); - query.extractTerms(terms); - final IndexReaderContext context = searcher.getTopReaderContext(); - final TermStatistics termStats[] = new TermStatistics[terms.size()]; - int i = 0; - for (Term term : terms) { - TermContext state = TermContext.build(context, term); - termStats[i] = searcher.termStatistics(term, state); - termContexts.put(term, state); - i++; - } - final String field = query.getField(); - if (field != null) { - stats = similarity.computeWeight(query.getBoost(), - searcher.collectionStatistics(query.getField()), - termStats); - } } /** - * @return the SpanCollectorFactory associated with this SpanWeight + * Collect all TermContexts used by this Weight + * @param contexts a map to add the TermContexts to */ - public SpanCollectorFactory getSpanCollectorFactory() { - return collectorFactory; - } + public abstract void extractTermContexts(Map contexts); - @Override - public void extractTerms(Set terms) { - query.extractTerms(terms); + /** + * Expert: Return a Spans object iterating over matches from this Weight + * @param ctx a LeafReaderContext for this Spans + * @param acceptDocs a bitset of documents to check + * @param collector a SpanCollector to use for postings data collection + * @return a Spans + * @throws IOException on error + */ + public abstract Spans getSpans(LeafReaderContext ctx, Bits acceptDocs, SpanCollector collector) throws IOException; + + /** + * Expert: Return a Spans object iterating over matches from this Weight, without + * collecting any postings data. + * @param ctx a LeafReaderContext for this Spans + * @param acceptDocs a bitset of documents to check + * @return a Spans + * @throws IOException on error + */ + public final Spans getSpans(LeafReaderContext ctx, Bits acceptDocs) throws IOException { + return getSpans(ctx, acceptDocs, collectorFactory.newCollector()); } @Override public float getValueForNormalization() throws IOException { - return stats == null ? 1.0f : stats.getValueForNormalization(); + return similarity == null ? 1.0f : similarity.getValueForNormalization(); } @Override public void normalize(float queryNorm, float topLevelBoost) { - if (stats != null) { - stats.normalize(queryNorm, topLevelBoost); + if (similarity != null) { + similarity.normalize(queryNorm, topLevelBoost); } } @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - if (stats == null) { + if (similarity == null) { return null; } - Terms terms = context.reader().terms(query.getField()); + Terms terms = context.reader().terms(similarity.getField()); if (terms != null && terms.hasPositions() == false) { - throw new IllegalStateException("field \"" + query.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + query + ")"); + throw new IllegalStateException("field \"" + similarity.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); } - Spans spans = query.getSpans(context, acceptDocs, termContexts, collectorFactory.newCollector()); - return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context)); + Spans spans = getSpans(context, acceptDocs, collectorFactory.newCollector()); + return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(context)); } @Override @@ -117,7 +111,7 @@ public class SpanWeight extends Weight { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.sloppyFreq(); - SimScorer docScorer = similarity.simScorer(stats, context); + SimScorer docScorer = similarity.simScorer(context); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match(scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index eb1a2af0acc..7b13d819297 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -18,16 +18,15 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.ArrayList; -import java.util.Map; /** Keep matches that are contained within another Spans. */ public class SpanWithinQuery extends SpanContainQuery { + /** Construct a SpanWithinQuery matching spans from little * that are inside of big. * This query has the boost of little. @@ -49,62 +48,79 @@ public class SpanWithinQuery extends SpanContainQuery { (SpanQuery) little.clone()); } - /** - * Return spans from little that are contained in a spans from big. - * The payload is from the spans of little. - */ @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { - ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts, collector); - if (containerContained == null) { - return null; + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, false, factory); + SpanWeight littleWeight = little.createWeight(searcher, false, factory); + SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight); + return new SpanWithinWeight(similarity, factory, bigWeight, littleWeight); + } + + public class SpanWithinWeight extends SpanContainWeight { + + public SpanWithinWeight(SpanSimilarity similarity, SpanCollectorFactory factory, + SpanWeight bigWeight, SpanWeight littleWeight) throws IOException { + super(similarity, factory, bigWeight, littleWeight); } - Spans big = containerContained.get(0); - Spans little = containerContained.get(1); - - return new ContainSpans(big, little, little) { - - @Override - boolean twoPhaseCurrentDocMatches() throws IOException { - oneExhaustedInCurrentDoc = false; - assert littleSpans.startPosition() == -1; - while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) { - while (bigSpans.endPosition() < littleSpans.endPosition()) { - if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) { - oneExhaustedInCurrentDoc = true; - return false; - } - } - if (bigSpans.startPosition() <= littleSpans.startPosition()) { - atFirstInCurrentDoc = true; - return true; - } - } - oneExhaustedInCurrentDoc = true; - return false; + /** + * Return spans from little that are contained in a spans from big. + * The payload is from the spans of little. + */ + @Override + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException { + ArrayList containerContained = prepareConjunction(context, acceptDocs, collector); + if (containerContained == null) { + return null; } - @Override - public int nextStartPosition() throws IOException { - if (atFirstInCurrentDoc) { - atFirstInCurrentDoc = false; - return littleSpans.startPosition(); - } - while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) { - while (bigSpans.endPosition() < littleSpans.endPosition()) { - if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) { - oneExhaustedInCurrentDoc = true; - return NO_MORE_POSITIONS; + Spans big = containerContained.get(0); + Spans little = containerContained.get(1); + + return new ContainSpans(big, little, little) { + + @Override + boolean twoPhaseCurrentDocMatches() throws IOException { + oneExhaustedInCurrentDoc = false; + assert littleSpans.startPosition() == -1; + while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) { + while (bigSpans.endPosition() < littleSpans.endPosition()) { + if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + return false; + } + } + if (bigSpans.startPosition() <= littleSpans.startPosition()) { + atFirstInCurrentDoc = true; + return true; } } - if (bigSpans.startPosition() <= littleSpans.startPosition()) { + oneExhaustedInCurrentDoc = true; + return false; + } + + @Override + public int nextStartPosition() throws IOException { + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; return littleSpans.startPosition(); } + while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) { + while (bigSpans.endPosition() < littleSpans.endPosition()) { + if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + return NO_MORE_POSITIONS; + } + } + if (bigSpans.startPosition() <= littleSpans.startPosition()) { + return littleSpans.startPosition(); + } + } + oneExhaustedInCurrentDoc = true; + return NO_MORE_POSITIONS; } - oneExhaustedInCurrentDoc = true; - return NO_MORE_POSITIONS; - } - }; + }; + } } + } \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 05a40d1d2bc..d85eabe4866 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -17,15 +17,10 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Map; -import java.util.Set; /** * Holds all implementations of classes in the o.a.l.s.spans package as a @@ -83,18 +78,13 @@ final class JustCompileSearchSpans { static final class JustCompileSpanQuery extends SpanQuery { - @Override - protected void extractTerms(Set terms) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public String getField() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java index 06ad4652d39..54d72726558 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java @@ -21,14 +21,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; /** * @@ -44,17 +40,14 @@ public class MultiSpansWrapper { } public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanCollector collector) throws IOException { + + IndexSearcher searcher = new IndexSearcher(reader); + searcher.setQueryCache(null); LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing LeafReaderContext lrContext = lr.getContext(); - SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly - HashSet termSet = new HashSet<>(); - rewrittenQuery.extractTerms(termSet); - Map termContexts = new HashMap<>(); - for (Term term: termSet) { - TermContext termContext = TermContext.build(lrContext, term); - termContexts.put(term, termContext); - } - Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts, collector); - return actSpans; + + SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(spanQuery, false); + + return w.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), collector); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index c7276e27e6d..381e1d7607a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -17,9 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.util.HashSet; -import java.util.Set; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -36,7 +33,11 @@ import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; -import static org.apache.lucene.search.spans.SpanTestUtil.*; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.lucene.search.spans.SpanTestUtil.assertFinished; +import static org.apache.lucene.search.spans.SpanTestUtil.assertNext; public class TestFieldMaskingSpanQuery extends LuceneTestCase { @@ -141,7 +142,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { QueryUtils.checkEqual(q, qr); Set terms = new HashSet<>(); - qr.extractTerms(terms); + qr.createWeight(searcher, false).extractTerms(terms); assertEquals(1, terms.size()); } @@ -161,7 +162,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { QueryUtils.checkUnequal(q, qr); Set terms = new HashSet<>(); - qr.extractTerms(terms); + qr.createWeight(searcher, false).extractTerms(terms); assertEquals(2, terms.size()); } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 4a9f11656bd..c16f969e43d 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -29,7 +29,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.CommonTermsQuery; @@ -68,7 +67,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; /** @@ -301,15 +299,9 @@ public class WeightedSpanTermExtractor { q = spanQuery; } LeafReaderContext context = getLeafContext(); - Map termContexts = new HashMap<>(); - TreeSet extractedTerms = new TreeSet<>(); SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false); - w.extractTerms(extractedTerms); - for (Term term : extractedTerms) { - termContexts.put(term, TermContext.build(context, term)); - } Bits acceptDocs = context.reader().getLiveDocs(); - final Spans spans = q.getSpans(context, acceptDocs, termContexts, w.getSpanCollectorFactory().newCollector()); + final Spans spans = w.getSpans(context, acceptDocs); if (spans == null) { return; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index 7a2697cb4cf..2a30a174299 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -18,16 +18,10 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Map; -import java.util.Set; /** Wraps a span query with asserts */ public class AssertingSpanQuery extends SpanQuery { @@ -37,21 +31,6 @@ public class AssertingSpanQuery extends SpanQuery { this.in = in; } - @Override - protected void extractTerms(Set terms) { - in.extractTerms(terms); - } - - @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { - Spans spans = in.getSpans(context, acceptDocs, termContexts, collector); - if (spans == null) { - return null; - } else { - return new AssertingSpans(spans); - } - } - @Override public String getField() { return in.getField(); @@ -63,15 +42,9 @@ public class AssertingSpanQuery extends SpanQuery { } @Override - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - // TODO: we are wasteful and createWeight twice in this case... use VirtualMethod? - // we need to not wrap if the query is e.g. a Payload one that overrides this (it should really be final) - SpanWeight weight = in.createWeight(searcher, needsScores); - if (weight.getClass() == SpanWeight.class) { - return super.createWeight(searcher, needsScores); - } else { - return weight; - } + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException { + SpanWeight weight = in.createWeight(searcher, needsScores, factory); + return new AssertingSpanWeight(weight); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java new file mode 100644 index 00000000000..d685832e583 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -0,0 +1,63 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * Wraps a SpanWeight with additional asserts + */ +public class AssertingSpanWeight extends SpanWeight { + + final SpanWeight in; + + /** + * Create an AssertingSpanWeight + * @param in the SpanWeight to wrap + * @throws IOException on error + */ + public AssertingSpanWeight(SpanWeight in) throws IOException { + super((SpanQuery) in.getQuery(), in.similarity, in.collectorFactory); + this.in = in; + } + + @Override + public void extractTermContexts(Map contexts) { + in.extractTermContexts(contexts); + } + + @Override + public Spans getSpans(LeafReaderContext context, Bits liveDocs, SpanCollector collector) throws IOException { + Spans spans = in.getSpans(context, liveDocs, collector); + if (spans == null) + return null; + return new AssertingSpans(spans); + } + + @Override + public void extractTerms(Set terms) { + in.extractTerms(terms); + } +}