LUCENE-6845: Merge SpanScorer into Spans

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1709964 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2015-10-22 09:16:03 +00:00
parent 55261556c7
commit 5969426d82
33 changed files with 381 additions and 403 deletions

View File

@ -271,6 +271,8 @@ Other
* LUCENE-6467: Simplify Query.equals. (Paul Elschot via Adrien Grand)
* LUCENE-6845: SpanScorer is now merged into Spans (Alan Woodward, David Smiley)
Build
* LUCENE-6732: Improve checker for invalid source patterns to also

View File

@ -19,8 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.search.spans.Spans;
/**
* This abstract class defines methods to iterate over a set of non-decreasing
* doc ids. Note that this class assumes it iterates on doc Ids, and therefore

View File

@ -20,11 +20,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.spans.Spans;
/**
* Returned by {@link Scorer#asTwoPhaseIterator()}
* and {@link Spans#asTwoPhaseIterator()}
* to expose an approximation of a {@link DocIdSetIterator}.
* When the {@link #approximation()}'s
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
@ -100,8 +97,6 @@ public abstract class TwoPhaseIterator {
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
return (iter instanceof Scorer)
? ((Scorer) iter).asTwoPhaseIterator()
: (iter instanceof Spans)
? ((Spans) iter).asTwoPhaseIterator()
: null;
}

View File

@ -23,6 +23,7 @@ import java.util.List;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.ConjunctionDISI;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* Common super class for multiple sub spans required in a document.
@ -33,7 +34,8 @@ abstract class ConjunctionSpans extends Spans {
boolean atFirstInCurrentDoc; // a first start position is available in current doc for nextStartPosition
boolean oneExhaustedInCurrentDoc; // one subspans exhausted in current doc
ConjunctionSpans(List<Spans> subSpans) {
ConjunctionSpans(List<Spans> subSpans, SpanWeight weight, Similarity.SimScorer docScorer) {
super(weight, docScorer);
if (subSpans.size() < 2) {
throw new IllegalArgumentException("Less than 2 subSpans.size():" + subSpans.size());
}

View File

@ -21,13 +21,15 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.search.similarities.Similarity;
abstract class ContainSpans extends ConjunctionSpans {
Spans sourceSpans;
Spans bigSpans;
Spans littleSpans;
ContainSpans(Spans bigSpans, Spans littleSpans, Spans sourceSpans) {
super(Arrays.asList(bigSpans, littleSpans));
ContainSpans(SpanWeight weight, Similarity.SimScorer simScorer, Spans bigSpans, Spans littleSpans, Spans sourceSpans) {
super(Arrays.asList(bigSpans, littleSpans), weight, simScorer);
this.bigSpans = Objects.requireNonNull(bigSpans);
this.littleSpans = Objects.requireNonNull(littleSpans);
this.sourceSpans = Objects.requireNonNull(sourceSpans);

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* A {@link Spans} implementation wrapping another spans instance,
@ -35,7 +36,8 @@ public abstract class FilterSpans extends Spans {
private int startPos = -1;
/** Wrap the given {@link Spans}. */
protected FilterSpans(Spans in) {
protected FilterSpans(Spans in, Similarity.SimScorer docScorer) {
super((SpanWeight)in.getWeight(), docScorer);
this.in = Objects.requireNonNull(in);
}
@ -181,7 +183,7 @@ public abstract class FilterSpans extends Spans {
}
}
}
/**
* Status returned from {@link FilterSpans#accept(Spans)} that indicates
* whether a candidate match should be accepted, rejected, or rejected

View File

@ -1,34 +0,0 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
/**
* Common super class for un/ordered Spans with a maximum slop between them.
*/
abstract class NearSpans extends ConjunctionSpans {
final SpanNearQuery query;
final int allowedSlop;
NearSpans(SpanNearQuery query, List<Spans> subSpans) {
super(subSpans);
this.query = query;
this.allowedSlop = query.getSlop();
}
}

View File

@ -20,6 +20,8 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.search.similarities.Similarity;
/**
* A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them.
@ -42,15 +44,18 @@ import java.util.List;
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansOrdered extends NearSpans {
public class NearSpansOrdered extends ConjunctionSpans {
protected int matchStart = -1;
protected int matchEnd = -1;
protected int matchWidth = -1;
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
super(query, subSpans);
private final int allowedSlop;
public NearSpansOrdered(SpanWeight weight, int allowedSlop, List<Spans> subSpans, Similarity.SimScorer simScorer) throws IOException {
super(subSpans, weight, simScorer);
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
this.allowedSlop = allowedSlop;
}
@Override
@ -149,7 +154,7 @@ public class NearSpansOrdered extends NearSpans {
@Override
public String toString() {
return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
return "NearSpansOrdered("+weight.getQuery().toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
}

View File

@ -17,28 +17,30 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.PriorityQueue;
/**
* Similar to {@link NearSpansOrdered}, but for the unordered case.
*
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansUnordered extends NearSpans {
public class NearSpansUnordered extends ConjunctionSpans {
private List<SpansCell> subSpanCells; // in query order
private final int allowedSlop;
private SpanPositionQueue spanPositionQueue;
public NearSpansUnordered(SpanNearQuery query, List<Spans> subSpans)
public NearSpansUnordered(SpanWeight weight, int allowedSlop, List<Spans> subSpans, Similarity.SimScorer simScorer)
throws IOException {
super(query, subSpans);
super(subSpans, weight, simScorer);
this.subSpanCells = new ArrayList<>(subSpans.size());
for (Spans subSpan : subSpans) { // sub spans in query order
@ -46,6 +48,7 @@ public class NearSpansUnordered extends NearSpans {
}
spanPositionQueue = new SpanPositionQueue(subSpans.size());
singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1
this.allowedSlop = allowedSlop;
}
private void singleCellToPositionQueue() {
@ -74,6 +77,7 @@ public class NearSpansUnordered extends NearSpans {
final Spans in;
public SpansCell(Spans spans) {
super((SpanWeight) NearSpansUnordered.this.weight, NearSpansUnordered.this.docScorer);
this.in = spans;
}
@ -172,7 +176,7 @@ public class NearSpansUnordered extends NearSpans {
* or the spans start at the same position,
* and spans1 ends before spans2.
*/
static final boolean positionsOrdered(Spans spans1, Spans spans2) {
static boolean positionsOrdered(Spans spans1, Spans spans2) {
assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID();
int start1 = spans1.startPosition();
int start2 = spans2.startPosition();
@ -261,10 +265,10 @@ public class NearSpansUnordered extends NearSpans {
@Override
public String toString() {
if (minPositionCell() != null) {
return getClass().getName() + "("+query.toString()+")@"+
return getClass().getName() + "("+weight.getQuery().toString()+")@"+
(docID()+":"+startPosition()+"-"+endPosition());
} else {
return getClass().getName() + "("+query.toString()+")@ ?START?";
return getClass().getName() + "("+weight.getQuery().toString()+")@ ?START?";
}
}
}

View File

@ -0,0 +1,85 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity;
/**
* A Spans that wraps another Spans with a different SimScorer
*/
public class ScoringWrapperSpans extends Spans {
private final Spans in;
/**
* Creates a new ScoringWrapperSpans
* @param spans the scorer to wrap
* @param simScorer the SimScorer to use for scoring
*/
public ScoringWrapperSpans(Spans spans, Similarity.SimScorer simScorer) {
super((SpanWeight) spans.getWeight(), simScorer);
this.in = spans;
}
@Override
public int nextStartPosition() throws IOException {
return in.nextStartPosition();
}
@Override
public int startPosition() {
return in.startPosition();
}
@Override
public int endPosition() {
return in.endPosition();
}
@Override
public int width() {
return in.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);
}
@Override
public int docID() {
return in.docID();
}
@Override
public int nextDoc() throws IOException {
return in.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return in.advance(target);
}
@Override
public long cost() {
return in.cost();
}
}

View File

@ -17,16 +17,16 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
/** Keep matches that contain another Spans. */
/** Keep matches that contain another SpanScorer. */
public final class SpanContainingQuery extends SpanContainQuery {
/** Construct a SpanContainingQuery matching spans from <code>big</code>
* that contain at least one spans from <code>little</code>.
@ -71,7 +71,7 @@ public final class SpanContainingQuery extends SpanContainQuery {
Spans big = containerContained.get(0);
Spans little = containerContained.get(1);
return new ContainSpans(big, little, big) {
return new ContainSpans(this, getSimScorer(context), big, little, big) {
@Override
boolean twoPhaseCurrentDocMatches() throws IOException {

View File

@ -33,7 +33,6 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ToStringUtils;
/** Matches spans which are near one another. One can specify <i>slop</i>, the
* maximum number of intervening unmatched positions, as well as whether
@ -220,8 +219,8 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
// all NearSpans require at least two subSpans
return (!inOrder) ? new NearSpansUnordered(SpanNearQuery.this, subSpans)
: new NearSpansOrdered(SpanNearQuery.this, subSpans);
return (!inOrder) ? new NearSpansUnordered(this, slop, subSpans, getSimScorer(context))
: new NearSpansOrdered(this, slop, subSpans, getSimScorer(context));
}
@Override
@ -331,6 +330,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
final int width;
GapSpans(int width) {
super(null, null);
this.width = width;
}

View File

@ -17,6 +17,11 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -25,12 +30,6 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/** Removes matches which overlap with another SpanQuery or which are
* within x tokens before or y tokens after another SpanQuery.
@ -127,13 +126,13 @@ public final class SpanNotQuery extends SpanQuery {
Spans excludeSpans = excludeWeight.getSpans(context, requiredPostings);
if (excludeSpans == null) {
return includeSpans;
return new ScoringWrapperSpans(includeSpans, getSimScorer(context));
}
TwoPhaseIterator excludeTwoPhase = excludeSpans.asTwoPhaseIterator();
DocIdSetIterator excludeApproximation = excludeTwoPhase == null ? null : excludeTwoPhase.approximation();
return new FilterSpans(includeSpans) {
return new FilterSpans(includeSpans, getSimScorer(context)) {
// last document we have checked matches() against for the exclusion, and failed
// when using approximations, so we don't call it again, and pass thru all inclusions.
int lastApproxDoc = -1;

View File

@ -17,6 +17,13 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -28,13 +35,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TwoPhaseIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Matches the union of its clauses.
*/
@ -166,7 +166,7 @@ public final class SpanOrQuery extends SpanQuery {
if (subSpans.size() == 0) {
return null;
} else if (subSpans.size() == 1) {
return subSpans.get(0);
return new ScoringWrapperSpans(subSpans.get(0), getSimScorer(context));
}
DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
@ -176,7 +176,7 @@ public final class SpanOrQuery extends SpanQuery {
SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
return new Spans() {
return new Spans(this, getSimScorer(context)) {
Spans topPositionSpans = null;
@Override

View File

@ -17,6 +17,11 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -25,11 +30,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* Base class for filtering a SpanQuery based on the position of a match.
@ -94,7 +94,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
@Override
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
Spans matchSpans = matchWeight.getSpans(context, requiredPostings);
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
return (matchSpans == null) ? null : new FilterSpans(matchSpans, getSimScorer(context)) {
@Override
protected AcceptStatus accept(Spans candidate) throws IOException {
return acceptPosition(candidate);

View File

@ -1,165 +0,0 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* Public for extension only.
*/
public class SpanScorer extends Scorer {
/** underlying spans we are scoring from */
protected final Spans spans;
/** similarity used in default score impl */
protected final Similarity.SimScorer docScorer;
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
protected float freq;
/** number of matches (computed in setFreqCurrentDoc) */
protected int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
/**
* Creates a new SpanScorer
* @lucene.internal
*/
public SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
this.spans = Objects.requireNonNull(spans);
}
@Override
public final int nextDoc() throws IOException {
return spans.nextDoc();
}
@Override
public final int advance(int target) throws IOException {
return spans.advance(target);
}
/**
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
*/
private final void ensureFreq() throws IOException {
int currentDoc = spans.docID();
if (lastScoredDoc != currentDoc) {
setFreqCurrentDoc();
lastScoredDoc = currentDoc;
}
}
/**
* Sets {@link #freq} and {@link #numMatches} for the current document.
* <p>
* This will be called at most once per document.
*/
protected final void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
doStartCurrentDoc();
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
int prevStartPos = -1;
int prevEndPos = -1;
int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
assert startPos >= prevStartPos;
int endPos = spans.endPosition();
assert endPos != Spans.NO_MORE_POSITIONS;
// This assertion can fail for Or spans on the same term:
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
if (docScorer == null) { // scores not required, break out here
freq = 1;
return;
}
freq += docScorer.computeSlopFactor(spans.width());
doCurrentSpans();
prevStartPos = startPos;
prevEndPos = endPos;
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
}
/**
* Called before the current doc's frequency is calculated
*/
protected void doStartCurrentDoc() throws IOException {}
/**
* Called each time the scorer's Spans is advanced during frequency calculation
*/
protected void doCurrentSpans() throws IOException {}
/**
* Score the current doc. The default implementation scores the doc
* with the similarity using the slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
return docScorer.score(spans.docID(), freq);
}
@Override
public final int docID() { return spans.docID(); }
@Override
public final float score() throws IOException {
ensureFreq();
return scoreCurrentDoc();
}
@Override
public final int freq() throws IOException {
ensureFreq();
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public final float sloppyFreq() throws IOException {
ensureFreq();
return freq;
}
@Override
public final long cost() {
return spans.cost();
}
@Override
public final TwoPhaseIterator asTwoPhaseIterator() {
return spans.asTwoPhaseIterator();
}
}

View File

@ -17,6 +17,12 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
@ -27,13 +33,6 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/** Matches spans containing a term.
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
@ -118,7 +117,7 @@ public class SpanTermQuery extends SpanQuery {
termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
return new TermSpans(postings, term);
return new TermSpans(this, getSimScorer(context), postings, term);
}
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
@ -132,25 +131,22 @@ public abstract class SpanWeight extends Weight {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (field == null) {
return null;
}
Terms terms = context.reader().terms(field);
if (terms != null && terms.hasPositions() == false) {
throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
}
Spans spans = getSpans(context, Postings.POSITIONS);
Similarity.SimScorer simScorer = getSimScorer(context);
return (spans == null) ? null : new SpanScorer(spans, this, simScorer);
return getSpans(context, Postings.POSITIONS);
}
/**
* Return a SimScorer for this context
* @param context the LeafReaderContext
* @return a SimWeight
* @throws IOException on error
*/
public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException {
return simWeight == null ? null : similarity.simScorer(simWeight, context);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SpanScorer scorer = (SpanScorer) scorer(context);
Spans scorer = (Spans) scorer(context);
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {

View File

@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
/** Keep matches that are contained within another Spans. */
public final class SpanWithinQuery extends SpanContainQuery {
@ -72,7 +72,7 @@ public final class SpanWithinQuery extends SpanContainQuery {
Spans big = containerContained.get(0);
Spans little = containerContained.get(1);
return new ContainSpans(big, little, little) {
return new ContainSpans(this, getSimScorer(context), big, little, little) {
@Override
boolean twoPhaseCurrentDocMatches() throws IOException {

View File

@ -19,8 +19,8 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** Iterates through combinations of start/end positions per-doc.
@ -28,9 +28,24 @@ import org.apache.lucene.search.similarities.Similarity.SimScorer;
* These are enumerated in order, by increasing document number, within that by
* increasing start position and finally by increasing end position.
*/
public abstract class Spans extends DocIdSetIterator {
public abstract class Spans extends Scorer {
public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
protected final Similarity.SimScorer docScorer;
protected Spans(SpanWeight weight, SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
}
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
protected float freq;
/** number of matches (computed in setFreqCurrentDoc) */
protected int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
/**
* Returns the next start position for the current doc.
* There is always at least one start/end position per doc.
@ -71,26 +86,6 @@ public abstract class Spans extends DocIdSetIterator {
*/
public abstract void collect(SpanCollector collector) throws IOException;
/**
* Optional method: Return a {@link TwoPhaseIterator} view of this
* {@link Spans}. A return value of {@code null} indicates that
* two-phase iteration is not supported.
*
* Note that the returned {@link TwoPhaseIterator}'s
* {@link TwoPhaseIterator#approximation() approximation} must
* advance documents synchronously with this iterator:
* advancing the approximation must
* advance this iterator and vice-versa.
*
* Implementing this method is typically useful on a {@link Spans}
* that has a high per-document overhead for confirming matches.
*
* The default implementation returns {@code null}.
*/
public TwoPhaseIterator asTwoPhaseIterator() {
return null;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@ -103,4 +98,94 @@ public abstract class Spans extends DocIdSetIterator {
return sb.toString();
}
/**
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
*/
private void ensureFreq() throws IOException {
int currentDoc = docID();
if (lastScoredDoc != currentDoc) {
setFreqCurrentDoc();
lastScoredDoc = currentDoc;
}
}
/**
* Sets {@link #freq} and {@link #numMatches} for the current document.
* <p>
* This will be called at most once per document.
*/
protected final void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
doStartCurrentDoc();
assert startPosition() == -1 : "incorrect initial start position, " + this.toString();
assert endPosition() == -1 : "incorrect initial end position, " + this.toString();
int prevStartPos = -1;
int prevEndPos = -1;
int startPos = nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, " + this.toString();
do {
assert startPos >= prevStartPos;
int endPos = endPosition();
assert endPos != Spans.NO_MORE_POSITIONS;
// This assertion can fail for Or spans on the same term:
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
if (docScorer == null) { // scores not required, break out here
freq = 1;
return;
}
freq += docScorer.computeSlopFactor(width());
doCurrentSpans();
prevStartPos = startPos;
prevEndPos = endPos;
startPos = nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
assert startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, " + this.toString();
assert endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, " + this.toString();
}
/**
* Called before the current doc's frequency is calculated
*/
protected void doStartCurrentDoc() throws IOException {}
/**
* Called each time the scorer's SpanScorer is advanced during frequency calculation
*/
protected void doCurrentSpans() throws IOException {}
/**
* Score the current doc. The default implementation scores the doc
* with the similarity using the slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
assert docScorer != null : getClass() + " has a null docScorer!";
return docScorer.score(docID(), freq);
}
@Override
public final float score() throws IOException {
ensureFreq();
return scoreCurrentDoc();
}
@Override
public final int freq() throws IOException {
ensureFreq();
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
final float sloppyFreq() throws IOException {
ensureFreq();
return freq;
}
}

View File

@ -16,12 +16,13 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.similarities.Similarity;
/**
* Expert:
@ -37,7 +38,8 @@ public class TermSpans extends Spans {
protected int position;
protected boolean readPayload;
public TermSpans(PostingsEnum postings, Term term) {
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) {
super(weight, scorer);
this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term);
this.doc = -1;

View File

@ -42,9 +42,9 @@ import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.Similarity;
/**
* Holds all implementations of classes in the o.a.l.s.spans package as a
@ -35,6 +34,10 @@ final class JustCompileSearchSpans {
static final class JustCompileSpans extends Spans {
JustCompileSpans() {
super(null, null);
}
@Override
public int docID() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -100,65 +103,4 @@ final class JustCompileSearchSpans {
}
static final class JustCompilePayloadSpans extends Spans {
@Override
public int docID() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int startPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int width() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void collect(SpanCollector collector) throws IOException {
}
@Override
public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileSpanScorer extends SpanScorer {
protected JustCompileSpanScorer(Spans spans, SpanWeight weight,
Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}
@Override
protected float scoreCurrentDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
}

View File

@ -31,6 +31,8 @@ public class TestFilterSpans extends LuceneTestCase {
// verify that all methods of Spans are overridden by FilterSpans,
// except those under the 'exclude' list
Set<Method> exclude = new HashSet<>();
exclude.add(FilterSpans.class.getMethod("freq"));
exclude.add(FilterSpans.class.getMethod("score"));
for (Method m : FilterSpans.class.getMethods()) {
if (m.getDeclaringClass() == Spans.class) {
assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m));

View File

@ -63,9 +63,9 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;

View File

@ -17,6 +17,7 @@ package org.apache.lucene.queries.payloads;
*/
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.spans.Spans;
/**
* An abstract class that defines a way for PayloadScoreQuery instances to transform
@ -41,7 +42,7 @@ public abstract class PayloadFunction {
* @param currentPayloadScore The score for the current payload
* @return The new current Score
*
* @see org.apache.lucene.search.spans.Spans
* @see Spans
*/
public abstract float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore);

View File

@ -32,7 +32,6 @@ import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
@ -137,7 +136,7 @@ public class PayloadScoreQuery extends SpanQuery {
Spans spans = getSpans(context, Postings.PAYLOADS);
if (spans == null)
return null;
return new PayloadSpanScorer(spans, this, innerWeight.getSimScorer(context));
return new PayloadSpans(spans, this, innerWeight.getSimScorer(context));
}
@Override
@ -157,7 +156,7 @@ public class PayloadScoreQuery extends SpanQuery {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
PayloadSpanScorer scorer = (PayloadSpanScorer) scorer(context);
PayloadSpans scorer = (PayloadSpans) scorer(context);
if (scorer == null || scorer.advance(doc) != doc)
return Explanation.noMatch("No match");
@ -174,13 +173,40 @@ public class PayloadScoreQuery extends SpanQuery {
}
}
private class PayloadSpanScorer extends SpanScorer implements SpanCollector {
private class PayloadSpans extends Spans implements SpanCollector {
private int payloadsSeen;
private float payloadScore;
private final Spans in;
private PayloadSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
private PayloadSpans(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(weight, docScorer);
this.in = spans;
}
@Override
public int nextStartPosition() throws IOException {
return in.nextStartPosition();
}
@Override
public int startPosition() {
return in.startPosition();
}
@Override
public int endPosition() {
return in.endPosition();
}
@Override
public int width() {
return in.width();
}
@Override
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);
}
@Override
@ -191,7 +217,7 @@ public class PayloadScoreQuery extends SpanQuery {
@Override
protected void doCurrentSpans() throws IOException {
spans.collect(this);
in.collect(this);
}
@Override
@ -199,8 +225,8 @@ public class PayloadScoreQuery extends SpanQuery {
BytesRef payload = postings.getPayload();
if (payload == null)
return;
float payloadFactor = docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload);
payloadScore = function.currentScore(docID(), getField(), spans.startPosition(), spans.endPosition(),
float payloadFactor = docScorer.computePayloadFactor(docID(), in.startPosition(), in.endPosition(), payload);
payloadScore = function.currentScore(docID(), getField(), in.startPosition(), in.endPosition(),
payloadsSeen, payloadScore, payloadFactor);
payloadsSeen++;
}
@ -228,6 +254,26 @@ public class PayloadScoreQuery extends SpanQuery {
public void reset() {
}
@Override
public int docID() {
return in.docID();
}
@Override
public int nextDoc() throws IOException {
return in.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return in.advance(target);
}
@Override
public long cost() {
return in.cost();
}
}
}

View File

@ -28,12 +28,10 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.FilterSpans;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
@ -92,7 +90,7 @@ public class SpanPayloadCheckQuery extends SpanQuery {
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
final PayloadChecker collector = new PayloadChecker();
Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS));
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
return (matchSpans == null) ? null : new FilterSpans(matchSpans, getSimScorer(context)) {
@Override
protected AcceptStatus accept(Spans candidate) throws IOException {
collector.reset();
@ -112,9 +110,7 @@ public class SpanPayloadCheckQuery extends SpanQuery {
throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
}
Spans spans = getSpans(context, Postings.PAYLOADS);
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
return (spans == null) ? null : new SpanScorer(spans, this, simScorer);
return getSpans(context, Postings.PAYLOADS);
}
}

View File

@ -48,9 +48,9 @@ import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;

View File

@ -41,8 +41,8 @@ import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.English;

View File

@ -38,9 +38,9 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
/**
* Experimental class to get set of payloads for most standard Lucene queries.

View File

@ -17,16 +17,17 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.search.similarities.Similarity;
/**
* Wraps a SpanWeight with additional asserts
@ -55,7 +56,12 @@ public class AssertingSpanWeight extends SpanWeight {
Spans spans = in.getSpans(context, requiredPostings);
if (spans == null)
return null;
return new AssertingSpans(spans);
return new AssertingSpans(spans, in.getSimScorer(context));
}
@Override
public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException {
return in.getSimScorer(context);
}
@Override

View File

@ -17,13 +17,14 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* Wraps a Spans with additional asserts
* Wraps a Spans with additional asserts
*/
class AssertingSpans extends Spans {
final Spans in;
@ -67,7 +68,8 @@ class AssertingSpans extends Spans {
State state = State.DOC_START;
AssertingSpans(Spans in) {
AssertingSpans(Spans in, Similarity.SimScorer docScorer) {
super((SpanWeight)in.getWeight(), docScorer);
this.in = in;
}
@ -187,7 +189,13 @@ class AssertingSpans extends Spans {
public long cost() {
return in.cost();
}
@Override
protected float scoreCurrentDoc() throws IOException {
assert in.docScorer != null : in.getClass() + " has no docScorer!";
return in.scoreCurrentDoc();
}
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
final TwoPhaseIterator iterator = in.asTwoPhaseIterator();
@ -196,7 +204,7 @@ class AssertingSpans extends Spans {
}
return new AssertingTwoPhaseView(iterator);
}
class AssertingTwoPhaseView extends TwoPhaseIterator {
final TwoPhaseIterator in;
int lastDoc = -1;