mirror of https://github.com/apache/lucene.git
LUCENE-6466: Move SpanQuery.getSpans() and .extractTerms() to SpanWeight
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680565 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
463d453abf
commit
2183e67cfd
|
@ -204,6 +204,9 @@ API Changes
|
|||
* LUCENE-6484: Removed EliasFanoDocIdSet, which was unused.
|
||||
(Paul Elschot via Adrien Grand)
|
||||
|
||||
* LUCENE-6466: Moved SpanQuery.getSpans() and .extractTerms() to SpanWeight
|
||||
(Alan Woodward)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6413: Test runner should report the number of suites completed/
|
||||
|
|
|
@ -24,9 +24,11 @@ import org.apache.lucene.search.Scorer;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.SpanCollectorFactory;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanSimilarity;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -34,8 +36,10 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -69,8 +73,13 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadNearSpanWeight(this, searcher);
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
List<SpanWeight> subWeights = new ArrayList<>();
|
||||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, PayloadSpanCollector.FACTORY));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new PayloadNearSpanWeight(subWeights, similarity);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -127,20 +136,20 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
&& function.equals(other.function);
|
||||
}
|
||||
|
||||
public class PayloadNearSpanWeight extends SpanWeight {
|
||||
public class PayloadNearSpanWeight extends SpanNearWeight {
|
||||
|
||||
public PayloadNearSpanWeight(SpanQuery query, IndexSearcher searcher)
|
||||
public PayloadNearSpanWeight(List<SpanWeight> subWeights, SpanSimilarity similarity)
|
||||
throws IOException {
|
||||
super(query, searcher, PayloadSpanCollector.FACTORY);
|
||||
super(subWeights, similarity, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
PayloadSpanCollector collector = PayloadSpanCollector.FACTORY.newCollector();
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts, collector);
|
||||
PayloadSpanCollector collector = (PayloadSpanCollector) collectorFactory.newCollector();
|
||||
Spans spans = super.getSpans(context, acceptDocs, collector);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(stats, context));
|
||||
: new PayloadNearSpanScorer(spans, this, collector, similarity.simScorer(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,7 +160,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
Explanation expl = Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
|
|
|
@ -34,7 +34,7 @@ import java.util.Collection;
|
|||
*/
|
||||
public class PayloadSpanCollector implements SpanCollector {
|
||||
|
||||
public static final SpanCollectorFactory<PayloadSpanCollector> FACTORY = new SpanCollectorFactory<PayloadSpanCollector>() {
|
||||
public static final SpanCollectorFactory FACTORY = new SpanCollectorFactory() {
|
||||
@Override
|
||||
public PayloadSpanCollector newCollector() {
|
||||
return new PayloadSpanCollector();
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
|
@ -35,16 +34,14 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Experimental class to get set of payloads for most standard Lucene queries.
|
||||
|
@ -179,18 +176,15 @@ public class PayloadSpanUtil {
|
|||
|
||||
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
|
||||
throws IOException {
|
||||
Map<Term,TermContext> termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
|
||||
final IndexSearcher searcher = new IndexSearcher(context);
|
||||
searcher.setQueryCache(null);
|
||||
searcher.createNormalizedWeight(query, false).extractTerms(terms);
|
||||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(context, term));
|
||||
}
|
||||
|
||||
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false);
|
||||
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
for (LeafReaderContext leafReaderContext : context.leaves()) {
|
||||
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts, collector);
|
||||
final Spans spans = w.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), collector);
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.payloads;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
|
@ -27,13 +28,12 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.BufferedSpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanCollectorFactory;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanSimilarity;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -70,7 +70,9 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadTermWeight(this, searcher);
|
||||
TermContext context = TermContext.build(searcher.getTopReaderContext(), term);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context));
|
||||
return new PayloadTermWeight(context, similarity);
|
||||
}
|
||||
|
||||
private static class PayloadTermCollector implements SpanCollector {
|
||||
|
@ -103,20 +105,20 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
}
|
||||
|
||||
private class PayloadTermWeight extends SpanWeight {
|
||||
private class PayloadTermWeight extends SpanTermWeight {
|
||||
|
||||
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher)
|
||||
public PayloadTermWeight(TermContext context, SpanSimilarity similarity)
|
||||
throws IOException {
|
||||
super(query, searcher, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
super(context, similarity, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
PayloadTermCollector collector = new PayloadTermCollector();
|
||||
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts, collector);
|
||||
Spans spans = super.getSpans(context, acceptDocs, collector);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(stats, context));
|
||||
: new PayloadTermSpanScorer(spans, this, collector, similarity.simScorer(context));
|
||||
}
|
||||
|
||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||
|
@ -125,7 +127,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
protected int payloadsSeen;
|
||||
private final PayloadTermCollector payloadCollector;
|
||||
|
||||
public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, PayloadTermCollector collector,
|
||||
public PayloadTermSpanScorer(Spans spans, SpanWeight weight, PayloadTermCollector collector,
|
||||
Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
this.payloadCollector = collector;
|
||||
|
@ -206,7 +208,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
if (newDoc == doc) {
|
||||
float freq = scorer.sloppyFreq();
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
Explanation expl = Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
|
|
|
@ -50,7 +50,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher, PayloadSpanCollector.FACTORY);
|
||||
return createWeight(searcher, needsScores, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -58,7 +58,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher, PayloadSpanCollector.FACTORY);
|
||||
return super.createWeight(searcher, needsScores, PayloadSpanCollector.FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,18 +18,12 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* <p>Wrapper to allow {@link SpanQuery} objects participate in composite
|
||||
|
@ -94,20 +88,10 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
|||
|
||||
// :NOTE: getBoost and setBoost are not proxied to the maskedQuery
|
||||
// ...this is done to be more consistent with things like SpanFirstQuery
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
return maskedQuery.getSpans(context, acceptDocs, termContexts, collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
maskedQuery.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return maskedQuery.createWeight(searcher, needsScores);
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
return maskedQuery.createWeight(searcher, needsScores, factory);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,19 +19,18 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
/**
|
||||
* Interface defining a factory for creating new {@link SpanCollector}s
|
||||
* @param <T> the SpanCollector type
|
||||
*/
|
||||
public interface SpanCollectorFactory<T extends SpanCollector> {
|
||||
public interface SpanCollectorFactory {
|
||||
|
||||
/**
|
||||
* @return a new SpanCollector
|
||||
*/
|
||||
T newCollector();
|
||||
SpanCollector newCollector();
|
||||
|
||||
/**
|
||||
* Factory for creating NO_OP collectors
|
||||
*/
|
||||
public static final SpanCollectorFactory<?> NO_OP_FACTORY = new SpanCollectorFactory() {
|
||||
public static final SpanCollectorFactory NO_OP_FACTORY = new SpanCollectorFactory() {
|
||||
@Override
|
||||
public SpanCollector newCollector() {
|
||||
return SpanCollector.NO_OP;
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
|
||||
abstract class SpanContainQuery extends SpanQuery implements Cloneable {
|
||||
|
||||
SpanQuery big;
|
||||
SpanQuery little;
|
||||
|
||||
|
@ -48,26 +49,48 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
|
|||
@Override
|
||||
public String getField() { return big.getField(); }
|
||||
|
||||
/** Extract terms from both <code>big</code> and <code>little</code>. */
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
big.extractTerms(terms);
|
||||
little.extractTerms(terms);
|
||||
}
|
||||
public abstract class SpanContainWeight extends SpanWeight {
|
||||
|
||||
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans bigSpans = big.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (bigSpans == null) {
|
||||
return null;
|
||||
final SpanWeight bigWeight;
|
||||
final SpanWeight littleWeight;
|
||||
|
||||
public SpanContainWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(SpanContainQuery.this, similarity, factory);
|
||||
this.bigWeight = bigWeight;
|
||||
this.littleWeight = littleWeight;
|
||||
}
|
||||
Spans littleSpans = little.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (littleSpans == null) {
|
||||
return null;
|
||||
|
||||
/**
|
||||
* Extract terms from both <code>big</code> and <code>little</code>.
|
||||
*/
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
bigWeight.extractTerms(terms);
|
||||
littleWeight.extractTerms(terms);
|
||||
}
|
||||
ArrayList<Spans> bigAndLittle = new ArrayList<>();
|
||||
bigAndLittle.add(bigSpans);
|
||||
bigAndLittle.add(littleSpans);
|
||||
return bigAndLittle;
|
||||
|
||||
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
Spans bigSpans = bigWeight.getSpans(context, acceptDocs, collector);
|
||||
if (bigSpans == null) {
|
||||
return null;
|
||||
}
|
||||
Spans littleSpans = littleWeight.getSpans(context, acceptDocs, collector);
|
||||
if (littleSpans == null) {
|
||||
return null;
|
||||
}
|
||||
ArrayList<Spans> bigAndLittle = new ArrayList<>();
|
||||
bigAndLittle.add(bigSpans);
|
||||
bigAndLittle.add(littleSpans);
|
||||
return bigAndLittle;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
bigWeight.extractTermContexts(contexts);
|
||||
littleWeight.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
String toString(String field, String name) {
|
||||
|
|
|
@ -18,13 +18,11 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that contain another Spans. */
|
||||
public class SpanContainingQuery extends SpanContainQuery {
|
||||
|
@ -48,63 +46,79 @@ public class SpanContainingQuery extends SpanContainQuery {
|
|||
(SpanQuery) big.clone(),
|
||||
(SpanQuery) little.clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return spans from <code>big</code> that contain at least one spans from <code>little</code>.
|
||||
* The payload is from the spans of <code>big</code>.
|
||||
*/
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight bigWeight = big.createWeight(searcher, false, factory);
|
||||
SpanWeight littleWeight = little.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight);
|
||||
return new SpanContainingWeight(similarity, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
public class SpanContainingWeight extends SpanContainWeight {
|
||||
|
||||
public SpanContainingWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(similarity, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
Spans big = containerContained.get(0);
|
||||
Spans little = containerContained.get(1);
|
||||
|
||||
return new ContainSpans(big, little, big) {
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
assert littleSpans.startPosition() == -1;
|
||||
while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (littleSpans.startPosition() < bigSpans.startPosition()) {
|
||||
if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (bigSpans.endPosition() >= littleSpans.endPosition()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
/**
|
||||
* Return spans from <code>big</code> that contain at least one spans from <code>little</code>.
|
||||
* The payload is from the spans of <code>big</code>.
|
||||
*/
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return bigSpans.startPosition();
|
||||
}
|
||||
while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (littleSpans.startPosition() < bigSpans.startPosition()) {
|
||||
if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
Spans big = containerContained.get(0);
|
||||
Spans little = containerContained.get(1);
|
||||
|
||||
return new ContainSpans(big, little, big) {
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
assert littleSpans.startPosition() == -1;
|
||||
while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (littleSpans.startPosition() < bigSpans.startPosition()) {
|
||||
if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (bigSpans.endPosition() >= littleSpans.endPosition()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (bigSpans.endPosition() >= littleSpans.endPosition()) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return bigSpans.startPosition();
|
||||
}
|
||||
while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (littleSpans.startPosition() < bigSpans.startPosition()) {
|
||||
if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
if (bigSpans.endPosition() >= littleSpans.endPosition()) {
|
||||
return bigSpans.startPosition();
|
||||
}
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,20 +18,17 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoringRewrite;
|
||||
import org.apache.lucene.search.TopTermsRewrite;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Wraps any {@link MultiTermQuery} as a {@link SpanQuery},
|
||||
|
@ -75,11 +72,6 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void extractTerms(Set<Term> terms) {
|
||||
throw new IllegalStateException("Rewrite first");
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: returns the rewriteMethod
|
||||
*/
|
||||
|
@ -97,17 +89,17 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
public final void setRewriteMethod(SpanRewriteMethod rewriteMethod) {
|
||||
query.setRewriteMethod(rewriteMethod);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
throw new UnsupportedOperationException("Query should have been rewritten");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return query.getField();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
throw new IllegalArgumentException("Rewrite first!");
|
||||
}
|
||||
|
||||
/** Returns the wrapped query */
|
||||
public Query getWrappedQuery() {
|
||||
return query;
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
@ -89,13 +90,6 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
@Override
|
||||
public String getField() { return field; }
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (final SpanQuery clause : clauses) {
|
||||
clause.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
@ -118,27 +112,61 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
List<SpanWeight> subWeights = new ArrayList<>();
|
||||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, factory));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new SpanNearWeight(subWeights, similarity, factory);
|
||||
}
|
||||
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null; // field does not exist
|
||||
public class SpanNearWeight extends SpanWeight {
|
||||
|
||||
final List<SpanWeight> subWeights;
|
||||
|
||||
public SpanNearWeight(List<SpanWeight> subWeights, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanNearQuery.this, similarity, factory);
|
||||
this.subWeights = subWeights;
|
||||
}
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector;
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts, subSpanCollector);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
} else {
|
||||
return null; // all required
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
for (SpanWeight w : subWeights) {
|
||||
w.extractTermContexts(contexts);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// all NearSpans require at least two subSpans
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans) : new NearSpansOrdered(this, subSpans, collector);
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null; // field does not exist
|
||||
}
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector;
|
||||
for (SpanWeight w : subWeights) {
|
||||
Spans subSpan = w.getSpans(context, acceptDocs, subSpanCollector);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
} else {
|
||||
return null; // all required
|
||||
}
|
||||
}
|
||||
|
||||
// all NearSpans require at least two subSpans
|
||||
return (!inOrder) ? new NearSpansUnordered(SpanNearQuery.this, subSpans)
|
||||
: new NearSpansOrdered(SpanNearQuery.this, subSpans, collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (SpanWeight w : subWeights) {
|
||||
w.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -77,9 +78,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
@Override
|
||||
public String getField() { return include.getField(); }
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) { include.extractTerms(terms); }
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
@ -105,69 +103,100 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (includeSpans == null) {
|
||||
return null;
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight includeWeight = include.createWeight(searcher, false, factory);
|
||||
SpanWeight excludeWeight = exclude.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, includeWeight);
|
||||
return new SpanNotWeight(similarity, factory, includeWeight, excludeWeight);
|
||||
}
|
||||
|
||||
public class SpanNotWeight extends SpanWeight {
|
||||
|
||||
final SpanWeight includeWeight;
|
||||
final SpanWeight excludeWeight;
|
||||
|
||||
public SpanNotWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
SpanWeight includeWeight, SpanWeight excludeWeight) throws IOException {
|
||||
super(SpanNotQuery.this, similarity, factory);
|
||||
this.includeWeight = includeWeight;
|
||||
this.excludeWeight = excludeWeight;
|
||||
}
|
||||
|
||||
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (excludeSpans == null) {
|
||||
return includeSpans;
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
includeWeight.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
TwoPhaseIterator excludeTwoPhase = excludeSpans.asTwoPhaseIterator();
|
||||
DocIdSetIterator excludeApproximation = excludeTwoPhase == null ? null : excludeTwoPhase.approximation();
|
||||
|
||||
return new FilterSpans(includeSpans) {
|
||||
// last document we have checked matches() against for the exclusion, and failed
|
||||
// when using approximations, so we don't call it again, and pass thru all inclusions.
|
||||
int lastApproxDoc = -1;
|
||||
boolean lastApproxResult = false;
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
// TODO: this logic is ugly and sneaky, can we clean it up?
|
||||
int doc = candidate.docID();
|
||||
if (doc > excludeSpans.docID()) {
|
||||
// catch up 'exclude' to the current doc
|
||||
if (excludeTwoPhase != null) {
|
||||
if (excludeApproximation.advance(doc) == doc) {
|
||||
lastApproxDoc = doc;
|
||||
lastApproxResult = excludeTwoPhase.matches();
|
||||
}
|
||||
} else {
|
||||
excludeSpans.advance(doc);
|
||||
}
|
||||
} else if (excludeTwoPhase != null && doc == excludeSpans.docID() && doc != lastApproxDoc) {
|
||||
// excludeSpans already sitting on our candidate doc, but matches not called yet.
|
||||
lastApproxDoc = doc;
|
||||
lastApproxResult = excludeTwoPhase.matches();
|
||||
}
|
||||
|
||||
if (doc != excludeSpans.docID() || (doc == lastApproxDoc && lastApproxResult == false)) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
|
||||
if (excludeSpans.startPosition() == -1) { // init exclude start position if needed
|
||||
excludeSpans.nextStartPosition();
|
||||
}
|
||||
|
||||
while (excludeSpans.endPosition() <= candidate.startPosition() - pre) {
|
||||
// exclude end position is before a possible exclusion
|
||||
if (excludeSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
return AcceptStatus.YES; // no more exclude at current doc.
|
||||
}
|
||||
}
|
||||
|
||||
// exclude end position far enough in current doc, check start position:
|
||||
if (candidate.endPosition() + post <= excludeSpans.startPosition()) {
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
Spans includeSpans = includeWeight.getSpans(context, acceptDocs, collector);
|
||||
if (includeSpans == null) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
Spans excludeSpans = excludeWeight.getSpans(context, acceptDocs, collector);
|
||||
if (excludeSpans == null) {
|
||||
return includeSpans;
|
||||
}
|
||||
|
||||
TwoPhaseIterator excludeTwoPhase = excludeSpans.asTwoPhaseIterator();
|
||||
DocIdSetIterator excludeApproximation = excludeTwoPhase == null ? null : excludeTwoPhase.approximation();
|
||||
|
||||
return new FilterSpans(includeSpans) {
|
||||
// last document we have checked matches() against for the exclusion, and failed
|
||||
// when using approximations, so we don't call it again, and pass thru all inclusions.
|
||||
int lastApproxDoc = -1;
|
||||
boolean lastApproxResult = false;
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
// TODO: this logic is ugly and sneaky, can we clean it up?
|
||||
int doc = candidate.docID();
|
||||
if (doc > excludeSpans.docID()) {
|
||||
// catch up 'exclude' to the current doc
|
||||
if (excludeTwoPhase != null) {
|
||||
if (excludeApproximation.advance(doc) == doc) {
|
||||
lastApproxDoc = doc;
|
||||
lastApproxResult = excludeTwoPhase.matches();
|
||||
}
|
||||
} else {
|
||||
excludeSpans.advance(doc);
|
||||
}
|
||||
} else if (excludeTwoPhase != null && doc == excludeSpans.docID() && doc != lastApproxDoc) {
|
||||
// excludeSpans already sitting on our candidate doc, but matches not called yet.
|
||||
lastApproxDoc = doc;
|
||||
lastApproxResult = excludeTwoPhase.matches();
|
||||
}
|
||||
|
||||
if (doc != excludeSpans.docID() || (doc == lastApproxDoc && lastApproxResult == false)) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
|
||||
if (excludeSpans.startPosition() == -1) { // init exclude start position if needed
|
||||
excludeSpans.nextStartPosition();
|
||||
}
|
||||
|
||||
while (excludeSpans.endPosition() <= candidate.startPosition() - pre) {
|
||||
// exclude end position is before a possible exclusion
|
||||
if (excludeSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
return AcceptStatus.YES; // no more exclude at current doc.
|
||||
}
|
||||
}
|
||||
|
||||
// exclude end position far enough in current doc, check start position:
|
||||
if (candidate.endPosition() + post <= excludeSpans.startPosition()) {
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
includeWeight.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.TermContext;
|
|||
import org.apache.lucene.search.DisiPriorityQueue;
|
||||
import org.apache.lucene.search.DisiWrapper;
|
||||
import org.apache.lucene.search.DisjunctionDISIApproximation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -71,13 +72,6 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
@Override
|
||||
public String getField() { return field; }
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for(final SpanQuery clause: clauses) {
|
||||
clause.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanOrQuery clone() {
|
||||
int sz = clauses.size();
|
||||
|
@ -143,190 +137,223 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
return h;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector)
|
||||
throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
List<SpanWeight> subWeights = new ArrayList<>(clauses.size());
|
||||
for (SpanQuery q : clauses) {
|
||||
subWeights.add(q.createWeight(searcher, false, factory));
|
||||
}
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, subWeights);
|
||||
return new SpanOrWeight(similarity, factory, subWeights);
|
||||
}
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
public class SpanOrWeight extends SpanWeight {
|
||||
|
||||
for (SpanQuery sq : clauses) {
|
||||
Spans spans = sq.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (spans != null) {
|
||||
subSpans.add(spans);
|
||||
final List<SpanWeight> subWeights;
|
||||
|
||||
public SpanOrWeight(SpanSimilarity similarity, SpanCollectorFactory factory, List<SpanWeight> subWeights) throws IOException {
|
||||
super(SpanOrQuery.this, similarity, factory);
|
||||
this.subWeights = subWeights;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (final SpanWeight w: subWeights) {
|
||||
w.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
if (subSpans.size() == 0) {
|
||||
return null;
|
||||
} else if (subSpans.size() == 1) {
|
||||
return subSpans.get(0);
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
for (SpanWeight w : subWeights) {
|
||||
w.extractTermContexts(contexts);
|
||||
}
|
||||
}
|
||||
|
||||
DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
|
||||
for (Spans spans : subSpans) {
|
||||
byDocQueue.add(new DisiWrapper<>(spans));
|
||||
}
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector)
|
||||
throws IOException {
|
||||
|
||||
SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
return new Spans() {
|
||||
Spans topPositionSpans = null;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
int currentDoc = topDocSpans.doc;
|
||||
do {
|
||||
topDocSpans.doc = topDocSpans.iterator.nextDoc();
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc == currentDoc);
|
||||
return topDocSpans.doc;
|
||||
for (SpanWeight w : subWeights) {
|
||||
Spans spans = w.getSpans(context, acceptDocs, collector);
|
||||
if (spans != null) {
|
||||
subSpans.add(spans);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
do {
|
||||
topDocSpans.doc = topDocSpans.iterator.advance(target);
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc < target);
|
||||
return topDocSpans.doc;
|
||||
if (subSpans.size() == 0) {
|
||||
return null;
|
||||
} else if (subSpans.size() == 1) {
|
||||
return subSpans.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
return topDocSpans.doc;
|
||||
DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
|
||||
for (Spans spans : subSpans) {
|
||||
byDocQueue.add(new DisiWrapper<>(spans));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
}
|
||||
SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
|
||||
|
||||
return new Spans() {
|
||||
Spans topPositionSpans = null;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
int currentDoc = topDocSpans.doc;
|
||||
do {
|
||||
topDocSpans.doc = topDocSpans.iterator.nextDoc();
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc == currentDoc);
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
|
||||
if (! hasApproximation) { // none of the sub spans supports approximations
|
||||
return null;
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
do {
|
||||
topDocSpans.doc = topDocSpans.iterator.advance(target);
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc < target);
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int lastDocTwoPhaseMatched = -1;
|
||||
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
// remove the head of the list as long as it does not match
|
||||
final int currentDoc = listAtCurrentDoc.doc;
|
||||
while (listAtCurrentDoc.twoPhaseView != null) {
|
||||
if (listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
// use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
|
||||
break;
|
||||
}
|
||||
// do not use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
if (listAtCurrentDoc == null) {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public int docID() {
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
lastDocTwoPhaseMatched = currentDoc;
|
||||
topPositionSpans = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
void fillPositionQueue() throws IOException { // called at first nextStartPosition
|
||||
assert byPositionQueue.size() == 0;
|
||||
// add all matching Spans at current doc to byPositionQueue
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
while (listAtCurrentDoc != null) {
|
||||
Spans spansAtDoc = listAtCurrentDoc.iterator;
|
||||
if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
|
||||
if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
|
||||
if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
|
||||
spansAtDoc = null;
|
||||
} else {
|
||||
if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
|
||||
if (! listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
spansAtDoc = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (spansAtDoc != null) {
|
||||
assert spansAtDoc.docID() == listAtCurrentDoc.doc;
|
||||
assert spansAtDoc.startPosition() == -1;
|
||||
spansAtDoc.nextStartPosition();
|
||||
assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
|
||||
byPositionQueue.add(spansAtDoc);
|
||||
if (!hasApproximation) { // none of the sub spans supports approximations
|
||||
return null;
|
||||
}
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
}
|
||||
assert byPositionQueue.size() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (topPositionSpans == null) {
|
||||
byPositionQueue.clear();
|
||||
fillPositionQueue(); // fills byPositionQueue at first position
|
||||
topPositionSpans = byPositionQueue.top();
|
||||
} else {
|
||||
topPositionSpans.nextStartPosition();
|
||||
topPositionSpans = byPositionQueue.updateTop();
|
||||
}
|
||||
return topPositionSpans.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
|
||||
}
|
||||
int lastDocTwoPhaseMatched = -1;
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
if (topPositionSpans != null)
|
||||
topPositionSpans.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
|
||||
long cost = -1;
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
if (cost == -1) {
|
||||
cost = 0;
|
||||
for (Spans spans : subSpans) {
|
||||
cost += spans.cost();
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
// remove the head of the list as long as it does not match
|
||||
final int currentDoc = listAtCurrentDoc.doc;
|
||||
while (listAtCurrentDoc.twoPhaseView != null) {
|
||||
if (listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
// use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
|
||||
break;
|
||||
}
|
||||
// do not use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
if (listAtCurrentDoc == null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
lastDocTwoPhaseMatched = currentDoc;
|
||||
topPositionSpans = null;
|
||||
return true;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
};
|
||||
|
||||
void fillPositionQueue() throws IOException { // called at first nextStartPosition
|
||||
assert byPositionQueue.size() == 0;
|
||||
// add all matching Spans at current doc to byPositionQueue
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
while (listAtCurrentDoc != null) {
|
||||
Spans spansAtDoc = listAtCurrentDoc.iterator;
|
||||
if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
|
||||
if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
|
||||
if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
|
||||
spansAtDoc = null;
|
||||
} else {
|
||||
if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
|
||||
if (!listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
spansAtDoc = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (spansAtDoc != null) {
|
||||
assert spansAtDoc.docID() == listAtCurrentDoc.doc;
|
||||
assert spansAtDoc.startPosition() == -1;
|
||||
spansAtDoc.nextStartPosition();
|
||||
assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
|
||||
byPositionQueue.add(spansAtDoc);
|
||||
}
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
}
|
||||
assert byPositionQueue.size() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (topPositionSpans == null) {
|
||||
byPositionQueue.clear();
|
||||
fillPositionQueue(); // fills byPositionQueue at first position
|
||||
topPositionSpans = byPositionQueue.top();
|
||||
} else {
|
||||
topPositionSpans.nextStartPosition();
|
||||
topPositionSpans = byPositionQueue.updateTop();
|
||||
}
|
||||
return topPositionSpans.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
if (topPositionSpans != null)
|
||||
topPositionSpans.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spanOr(" + SpanOrQuery.this + ")@" + docID() + ": " + startPosition() + " - " + endPosition();
|
||||
}
|
||||
|
||||
long cost = -1;
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
if (cost == -1) {
|
||||
cost = 0;
|
||||
for (Spans spans : subSpans) {
|
||||
cost += spans.cost();
|
||||
}
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -47,18 +48,9 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
* */
|
||||
public SpanQuery getMatch() { return match; }
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String getField() { return match.getField(); }
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
match.extractTerms(terms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementing classes are required to return whether the current position is a match for the passed in
|
||||
* "match" {@link SpanQuery}.
|
||||
|
@ -66,7 +58,6 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
* This is only called if the underlying last {@link Spans#nextStartPosition()} for the
|
||||
* match indicated a valid start position.
|
||||
*
|
||||
*
|
||||
* @param spans The {@link Spans} instance, positioned at the spot to check
|
||||
* @param collector the {@link SpanCollector} associated with the Spans
|
||||
*
|
||||
|
@ -78,14 +69,47 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
protected abstract AcceptStatus acceptPosition(Spans spans, SpanCollector collector) throws IOException;
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts, collector);
|
||||
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
return acceptPosition(candidate, collector);
|
||||
}
|
||||
};
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight matchWeight = match.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, matchWeight);
|
||||
return new SpanPositionCheckWeight(matchWeight, similarity, factory);
|
||||
}
|
||||
|
||||
public class SpanPositionCheckWeight extends SpanWeight {
|
||||
|
||||
final SpanWeight matchWeight;
|
||||
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity,
|
||||
SpanCollectorFactory collectorFactory) throws IOException {
|
||||
super(SpanPositionCheckQuery.this, similarity, collectorFactory);
|
||||
this.matchWeight = matchWeight;
|
||||
}
|
||||
|
||||
public SpanPositionCheckWeight(SpanWeight matchWeight, SpanSimilarity similarity) throws IOException {
|
||||
this(matchWeight, similarity, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
matchWeight.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
matchWeight.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
Spans matchSpans = matchWeight.getSpans(context, acceptDocs, collector);
|
||||
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
return acceptPosition(candidate, collector);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,47 +17,33 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
/** Expert: Returns the matches for this query in an index.
|
||||
* Used internally to search for spans.
|
||||
* This may return null to indicate that the SpanQuery has no results.
|
||||
*/
|
||||
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException;
|
||||
|
||||
/**
|
||||
* Extract terms from these spans.
|
||||
* @lucene.internal
|
||||
* @see Weight#extractTerms
|
||||
*/
|
||||
protected abstract void extractTerms(Set<Term> terms);
|
||||
|
||||
/**
|
||||
* Returns the name of the field matched by this query.
|
||||
* <p>
|
||||
* Note that this may return null if the query matches no terms.
|
||||
*/
|
||||
public abstract String getField();
|
||||
|
||||
/**
|
||||
* Create a SpanWeight for this query
|
||||
* @param searcher the IndexSearcher to be searched across
|
||||
* @param needsScores if the query needs scores
|
||||
* @param collectorFactory a SpanCollectorFactory to use in collecting postings data
|
||||
* @return a SpanWeight
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract SpanWeight createWeight(IndexSearcher searcher, boolean needsScores,
|
||||
SpanCollectorFactory collectorFactory) throws IOException;
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher, getSpanCollectorFactory());
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return createWeight(searcher, needsScores, SpanCollectorFactory.NO_OP_FACTORY);
|
||||
}
|
||||
|
||||
protected SpanCollectorFactory<? extends SpanCollector> getSpanCollectorFactory() {
|
||||
return SpanCollectorFactory.NO_OP_FACTORY;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,13 +17,13 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Public for extension only.
|
||||
*/
|
||||
|
@ -42,7 +42,7 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = Objects.requireNonNull(docScorer);
|
||||
this.docScorer = docScorer;
|
||||
this.spans = Objects.requireNonNull(spans);
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,10 @@ public class SpanScorer extends Scorer {
|
|||
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
|
||||
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
|
||||
numMatches++;
|
||||
if (docScorer == null) { // scores not required, break out here
|
||||
freq = 1;
|
||||
return;
|
||||
}
|
||||
int matchLength = endPos - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
prevStartPos = startPos;
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Encapsulates similarity statistics required for SpanScorers
|
||||
*/
|
||||
public abstract class SpanSimilarity {
|
||||
|
||||
/**
|
||||
* The field term statistics are taken from
|
||||
*/
|
||||
protected final String field;
|
||||
|
||||
/**
|
||||
* Create a new SpanSimilarity
|
||||
* @param field the similarity field for term statistics
|
||||
*/
|
||||
protected SpanSimilarity(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a SimScorer for this SpanSimilarity's statistics
|
||||
* @param context the LeafReaderContext to calculate the scorer for
|
||||
* @return a SimScorer, or null if no scoring is required
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* @return the field for term statistics
|
||||
*/
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.search.Weight#getValueForNormalization()}
|
||||
*
|
||||
* @return the value for normalization
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract float getValueForNormalization() throws IOException;
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.search.Weight#normalize(float,float)}
|
||||
*
|
||||
* @param queryNorm the query norm
|
||||
* @param topLevelBoost the top level boost
|
||||
*/
|
||||
public abstract void normalize(float queryNorm, float topLevelBoost);
|
||||
|
||||
/**
|
||||
* A SpanSimilarity class that calculates similarity statistics based on the term statistics
|
||||
* of a set of terms.
|
||||
*/
|
||||
public static class ScoringSimilarity extends SpanSimilarity {
|
||||
|
||||
private final Similarity similarity;
|
||||
private final Similarity.SimWeight stats;
|
||||
|
||||
private ScoringSimilarity(SpanQuery query, IndexSearcher searcher, TermStatistics... termStats) throws IOException {
|
||||
super(query.getField());
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException {
|
||||
return similarity.simScorer(stats, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return stats.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A SpanSimilarity class that does no scoring
|
||||
*/
|
||||
public static class NonScoringSimilarity extends SpanSimilarity {
|
||||
|
||||
private NonScoringSimilarity(String field) {
|
||||
super(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity.SimScorer simScorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to be run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param stats an array of TermStatistics to use in creating the similarity
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher,
|
||||
boolean needsScores, TermStatistics... stats) throws IOException {
|
||||
return needsScores ? new ScoringSimilarity(query, searcher, stats) : new NonScoringSimilarity(query.getField());
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to be run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param weights a set of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, List<SpanWeight> weights) throws IOException {
|
||||
return build(query, searcher, needsScores, weights.toArray(new SpanWeight[weights.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SpanSimilarity
|
||||
* @param query the SpanQuery to run
|
||||
* @param searcher the searcher
|
||||
* @param needsScores whether or not scores are required
|
||||
* @param weights an array of {@link org.apache.lucene.search.spans.SpanWeight}s to extract terms from
|
||||
* @return a SpanSimilarity, or null if there are no statistics to use
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static SpanSimilarity build(SpanQuery query, IndexSearcher searcher, boolean needsScores, SpanWeight... weights) throws IOException {
|
||||
|
||||
if (!needsScores)
|
||||
return new NonScoringSimilarity(query.getField());
|
||||
|
||||
Map<Term, TermContext> contexts = new HashMap<>();
|
||||
for (SpanWeight w : weights) {
|
||||
w.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
if (contexts.size() == 0)
|
||||
return null;
|
||||
|
||||
TermStatistics[] stats = new TermStatistics[contexts.size()];
|
||||
int i = 0;
|
||||
for (Term term : contexts.keySet()) {
|
||||
stats[i] = searcher.termStatistics(term, contexts.get(term));
|
||||
i++;
|
||||
}
|
||||
|
||||
return new ScoringSimilarity(query, searcher, stats);
|
||||
}
|
||||
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.TermContext;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
@ -50,8 +51,51 @@ public class SpanTermQuery extends SpanQuery {
|
|||
public String getField() { return term.field(); }
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
terms.add(term);
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
TermContext context = TermContext.build(searcher.getTopReaderContext(), term);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, searcher.termStatistics(term, context));
|
||||
return new SpanTermWeight(context, similarity, factory);
|
||||
}
|
||||
|
||||
public class SpanTermWeight extends SpanWeight {
|
||||
|
||||
final TermContext termContext;
|
||||
|
||||
public SpanTermWeight(TermContext termContext, SpanSimilarity similarity, SpanCollectorFactory factory) throws IOException {
|
||||
super(SpanTermQuery.this, similarity, factory);
|
||||
this.termContext = termContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
terms.add(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
contexts.put(term, termContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
|
||||
final TermState state = termContext.get(context.ord);
|
||||
if (state == null) { // term is not present in that reader
|
||||
return null;
|
||||
}
|
||||
|
||||
final Terms terms = context.reader().terms(term.field());
|
||||
if (terms == null)
|
||||
return null;
|
||||
if (terms.hasPositions() == false)
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings());
|
||||
return new TermSpans(postings, term);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -82,40 +126,4 @@ public class SpanTermQuery extends SpanQuery {
|
|||
return term.equals(other.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
TermContext termContext = termContexts.get(term);
|
||||
final TermState state;
|
||||
if (termContext == null) {
|
||||
// this happens with span-not query, as it doesn't include the NOT side in extractTerms()
|
||||
// so we seek to the term now in this segment..., this sucks because it's ugly mostly!
|
||||
final Terms terms = context.reader().terms(term.field());
|
||||
if (terms != null) {
|
||||
if (terms.hasPositions() == false) {
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
state = termsEnum.termState();
|
||||
} else {
|
||||
state = null;
|
||||
}
|
||||
} else {
|
||||
state = null;
|
||||
}
|
||||
} else {
|
||||
state = termContext.get(context.ord);
|
||||
}
|
||||
|
||||
if (state == null) { // term is not present in that reader
|
||||
return null;
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings());
|
||||
return new TermSpans(postings, term);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,97 +17,91 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Expert-only. Public for use by other weight implementations
|
||||
*/
|
||||
public class SpanWeight extends Weight {
|
||||
protected final Similarity similarity;
|
||||
protected final Map<Term,TermContext> termContexts;
|
||||
protected final SpanQuery query;
|
||||
protected final SpanCollectorFactory<?> collectorFactory;
|
||||
protected Similarity.SimWeight stats;
|
||||
public abstract class SpanWeight extends Weight {
|
||||
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher, SpanCollectorFactory<?> collectorFactory) throws IOException {
|
||||
protected final SpanSimilarity similarity;
|
||||
protected final SpanCollectorFactory collectorFactory;
|
||||
|
||||
/**
|
||||
* Create a new SpanWeight
|
||||
* @param query the parent query
|
||||
* @param similarity a SpanSimilarity to be used for scoring
|
||||
* @param collectorFactory a SpanCollectorFactory to be used for Span collection
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public SpanWeight(SpanQuery query, SpanSimilarity similarity, SpanCollectorFactory collectorFactory) throws IOException {
|
||||
super(query);
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.query = query;
|
||||
this.similarity = similarity;
|
||||
this.collectorFactory = collectorFactory;
|
||||
|
||||
termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
query.extractTerms(terms);
|
||||
final IndexReaderContext context = searcher.getTopReaderContext();
|
||||
final TermStatistics termStats[] = new TermStatistics[terms.size()];
|
||||
int i = 0;
|
||||
for (Term term : terms) {
|
||||
TermContext state = TermContext.build(context, term);
|
||||
termStats[i] = searcher.termStatistics(term, state);
|
||||
termContexts.put(term, state);
|
||||
i++;
|
||||
}
|
||||
final String field = query.getField();
|
||||
if (field != null) {
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
termStats);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the SpanCollectorFactory associated with this SpanWeight
|
||||
* Collect all TermContexts used by this Weight
|
||||
* @param contexts a map to add the TermContexts to
|
||||
*/
|
||||
public SpanCollectorFactory<?> getSpanCollectorFactory() {
|
||||
return collectorFactory;
|
||||
}
|
||||
public abstract void extractTermContexts(Map<Term, TermContext> contexts);
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
query.extractTerms(terms);
|
||||
/**
|
||||
* Expert: Return a Spans object iterating over matches from this Weight
|
||||
* @param ctx a LeafReaderContext for this Spans
|
||||
* @param acceptDocs a bitset of documents to check
|
||||
* @param collector a SpanCollector to use for postings data collection
|
||||
* @return a Spans
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract Spans getSpans(LeafReaderContext ctx, Bits acceptDocs, SpanCollector collector) throws IOException;
|
||||
|
||||
/**
|
||||
* Expert: Return a Spans object iterating over matches from this Weight, without
|
||||
* collecting any postings data.
|
||||
* @param ctx a LeafReaderContext for this Spans
|
||||
* @param acceptDocs a bitset of documents to check
|
||||
* @return a Spans
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public final Spans getSpans(LeafReaderContext ctx, Bits acceptDocs) throws IOException {
|
||||
return getSpans(ctx, acceptDocs, collectorFactory.newCollector());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return stats == null ? 1.0f : stats.getValueForNormalization();
|
||||
return similarity == null ? 1.0f : similarity.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
if (stats != null) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
if (similarity != null) {
|
||||
similarity.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (stats == null) {
|
||||
if (similarity == null) {
|
||||
return null;
|
||||
}
|
||||
Terms terms = context.reader().terms(query.getField());
|
||||
Terms terms = context.reader().terms(similarity.getField());
|
||||
if (terms != null && terms.hasPositions() == false) {
|
||||
throw new IllegalStateException("field \"" + query.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + query + ")");
|
||||
throw new IllegalStateException("field \"" + similarity.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
|
||||
}
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts, collectorFactory.newCollector());
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
Spans spans = getSpans(context, acceptDocs, collectorFactory.newCollector());
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -117,7 +111,7 @@ public class SpanWeight extends Weight {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.sloppyFreq();
|
||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||
SimScorer docScorer = similarity.simScorer(context);
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
return Explanation.match(scoreExplanation.getValue(),
|
||||
|
|
|
@ -18,16 +18,15 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that are contained within another Spans. */
|
||||
public class SpanWithinQuery extends SpanContainQuery {
|
||||
|
||||
/** Construct a SpanWithinQuery matching spans from <code>little</code>
|
||||
* that are inside of <code>big</code>.
|
||||
* This query has the boost of <code>little</code>.
|
||||
|
@ -49,62 +48,79 @@ public class SpanWithinQuery extends SpanContainQuery {
|
|||
(SpanQuery) little.clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return spans from <code>little</code> that are contained in a spans from <code>big</code>.
|
||||
* The payload is from the spans of <code>little</code>.
|
||||
*/
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight bigWeight = big.createWeight(searcher, false, factory);
|
||||
SpanWeight littleWeight = little.createWeight(searcher, false, factory);
|
||||
SpanSimilarity similarity = SpanSimilarity.build(this, searcher, needsScores, bigWeight, littleWeight);
|
||||
return new SpanWithinWeight(similarity, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
public class SpanWithinWeight extends SpanContainWeight {
|
||||
|
||||
public SpanWithinWeight(SpanSimilarity similarity, SpanCollectorFactory factory,
|
||||
SpanWeight bigWeight, SpanWeight littleWeight) throws IOException {
|
||||
super(similarity, factory, bigWeight, littleWeight);
|
||||
}
|
||||
|
||||
Spans big = containerContained.get(0);
|
||||
Spans little = containerContained.get(1);
|
||||
|
||||
return new ContainSpans(big, little, little) {
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
assert littleSpans.startPosition() == -1;
|
||||
while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (bigSpans.endPosition() < littleSpans.endPosition()) {
|
||||
if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (bigSpans.startPosition() <= littleSpans.startPosition()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
/**
|
||||
* Return spans from <code>little</code> that are contained in a spans from <code>big</code>.
|
||||
* The payload is from the spans of <code>little</code>.
|
||||
*/
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return littleSpans.startPosition();
|
||||
}
|
||||
while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (bigSpans.endPosition() < littleSpans.endPosition()) {
|
||||
if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
Spans big = containerContained.get(0);
|
||||
Spans little = containerContained.get(1);
|
||||
|
||||
return new ContainSpans(big, little, little) {
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
assert littleSpans.startPosition() == -1;
|
||||
while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (bigSpans.endPosition() < littleSpans.endPosition()) {
|
||||
if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (bigSpans.startPosition() <= littleSpans.startPosition()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (bigSpans.startPosition() <= littleSpans.startPosition()) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return littleSpans.startPosition();
|
||||
}
|
||||
while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
while (bigSpans.endPosition() < littleSpans.endPosition()) {
|
||||
if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
if (bigSpans.startPosition() <= littleSpans.startPosition()) {
|
||||
return littleSpans.startPosition();
|
||||
}
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,15 +17,10 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Holds all implementations of classes in the o.a.l.s.spans package as a
|
||||
|
@ -83,18 +78,13 @@ final class JustCompileSearchSpans {
|
|||
|
||||
static final class JustCompileSpanQuery extends SpanQuery {
|
||||
|
||||
@Override
|
||||
protected void extractTerms(Set<Term> terms) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,14 +21,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -44,17 +40,14 @@ public class MultiSpansWrapper {
|
|||
}
|
||||
|
||||
public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanCollector collector) throws IOException {
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setQueryCache(null);
|
||||
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
|
||||
LeafReaderContext lrContext = lr.getContext();
|
||||
SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
|
||||
HashSet<Term> termSet = new HashSet<>();
|
||||
rewrittenQuery.extractTerms(termSet);
|
||||
Map<Term,TermContext> termContexts = new HashMap<>();
|
||||
for (Term term: termSet) {
|
||||
TermContext termContext = TermContext.build(lrContext, term);
|
||||
termContexts.put(term, termContext);
|
||||
}
|
||||
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts, collector);
|
||||
return actSpans;
|
||||
|
||||
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(spanQuery, false);
|
||||
|
||||
return w.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), collector);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -36,7 +33,11 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import static org.apache.lucene.search.spans.SpanTestUtil.*;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.apache.lucene.search.spans.SpanTestUtil.assertFinished;
|
||||
import static org.apache.lucene.search.spans.SpanTestUtil.assertNext;
|
||||
|
||||
public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
||||
|
||||
|
@ -141,7 +142,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
QueryUtils.checkEqual(q, qr);
|
||||
|
||||
Set<Term> terms = new HashSet<>();
|
||||
qr.extractTerms(terms);
|
||||
qr.createWeight(searcher, false).extractTerms(terms);
|
||||
assertEquals(1, terms.size());
|
||||
}
|
||||
|
||||
|
@ -161,7 +162,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
QueryUtils.checkUnequal(q, qr);
|
||||
|
||||
Set<Term> terms = new HashSet<>();
|
||||
qr.extractTerms(terms);
|
||||
qr.createWeight(searcher, false).extractTerms(terms);
|
||||
assertEquals(2, terms.size());
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.queries.CommonTermsQuery;
|
||||
|
@ -68,7 +67,6 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -301,15 +299,9 @@ public class WeightedSpanTermExtractor {
|
|||
q = spanQuery;
|
||||
}
|
||||
LeafReaderContext context = getLeafContext();
|
||||
Map<Term,TermContext> termContexts = new HashMap<>();
|
||||
TreeSet<Term> extractedTerms = new TreeSet<>();
|
||||
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false);
|
||||
w.extractTerms(extractedTerms);
|
||||
for (Term term : extractedTerms) {
|
||||
termContexts.put(term, TermContext.build(context, term));
|
||||
}
|
||||
Bits acceptDocs = context.reader().getLiveDocs();
|
||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts, w.getSpanCollectorFactory().newCollector());
|
||||
final Spans spans = w.getSpans(context, acceptDocs);
|
||||
if (spans == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -18,16 +18,10 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Wraps a span query with asserts */
|
||||
public class AssertingSpanQuery extends SpanQuery {
|
||||
|
@ -37,21 +31,6 @@ public class AssertingSpanQuery extends SpanQuery {
|
|||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void extractTerms(Set<Term> terms) {
|
||||
in.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans spans = in.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (spans == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new AssertingSpans(spans);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return in.getField();
|
||||
|
@ -63,15 +42,9 @@ public class AssertingSpanQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
// TODO: we are wasteful and createWeight twice in this case... use VirtualMethod?
|
||||
// we need to not wrap if the query is e.g. a Payload one that overrides this (it should really be final)
|
||||
SpanWeight weight = in.createWeight(searcher, needsScores);
|
||||
if (weight.getClass() == SpanWeight.class) {
|
||||
return super.createWeight(searcher, needsScores);
|
||||
} else {
|
||||
return weight;
|
||||
}
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, SpanCollectorFactory factory) throws IOException {
|
||||
SpanWeight weight = in.createWeight(searcher, needsScores, factory);
|
||||
return new AssertingSpanWeight(weight);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Wraps a SpanWeight with additional asserts
|
||||
*/
|
||||
public class AssertingSpanWeight extends SpanWeight {
|
||||
|
||||
final SpanWeight in;
|
||||
|
||||
/**
|
||||
* Create an AssertingSpanWeight
|
||||
* @param in the SpanWeight to wrap
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public AssertingSpanWeight(SpanWeight in) throws IOException {
|
||||
super((SpanQuery) in.getQuery(), in.similarity, in.collectorFactory);
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
in.extractTermContexts(contexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits liveDocs, SpanCollector collector) throws IOException {
|
||||
Spans spans = in.getSpans(context, liveDocs, collector);
|
||||
if (spans == null)
|
||||
return null;
|
||||
return new AssertingSpans(spans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
in.extractTerms(terms);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue