LUCENE-6308: cutover Spans to DISI, reuse ConjunctionDISI, use two-phased iteration

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-03-31 08:27:45 +00:00
parent 03f4970074
commit d3cfba9b29
38 changed files with 1767 additions and 1621 deletions

View File

@ -32,6 +32,15 @@ API Changes
* LUCENE-6067: Accountable.getChildResources has a default
implementation returning the empty list. (Robert Muir)
======================= Lucene 5.2.0 =======================
New Features
* LUCENE-6308: Span queries now share document conjunction/intersection
code with boolean queries, and use two-phased iterators for
faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Robert Muir via Mike McCandless)
======================= Lucene 5.1.0 =======================
New Features

View File

@ -23,8 +23,14 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.search.spans.Spans;
class ConjunctionDISI extends DocIdSetIterator {
/** A conjunction of DocIdSetIterators.
* This iterates over the doc ids that are present in each given DocIdSetIterator.
* <br>Public only for use in {@link org.apache.lucene.search.spans}.
* @lucene.internal
*/
public class ConjunctionDISI extends DocIdSetIterator {
/** Create a conjunction over the provided iterators, taking advantage of
* {@link TwoPhaseIterator}. */
@ -32,18 +38,16 @@ class ConjunctionDISI extends DocIdSetIterator {
final List<DocIdSetIterator> allIterators = new ArrayList<>();
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
for (DocIdSetIterator iterator : iterators) {
if (iterator instanceof Scorer) {
// if we have a scorer, check if it supports two-phase iteration
TwoPhaseIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
if (twoPhaseIterator != null) {
// Note:
allIterators.add(twoPhaseIterator.approximation());
twoPhaseIterators.add(twoPhaseIterator);
} else {
allIterators.add(iterator);
}
} else {
// no approximation support, use the iterator as-is
TwoPhaseIterator twoPhaseIterator = null;
if (iterator instanceof Scorer) {
twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
} else if (iterator instanceof Spans) {
twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
}
if (twoPhaseIterator != null) {
allIterators.add(twoPhaseIterator.approximation());
twoPhaseIterators.add(twoPhaseIterator);
} else { // no approximation support, use the iterator as-is
allIterators.add(iterator);
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
@ -71,7 +70,7 @@ public class PayloadNearQuery extends SpanNearQuery {
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PayloadNearSpanWeight(this, searcher);
}
@ -113,7 +112,7 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
int result = super.hashCode() ^ getClass().hashCode();
result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
result = prime * result + ((function == null) ? 0 : function.hashCode());
return result;
@ -149,8 +148,10 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.simScorer(stats, context));
Spans spans = query.getSpans(context, acceptDocs, termContexts);
return (spans == null)
? null
: new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
}
@Override
@ -188,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
protected float payloadScore;
private int payloadsSeen;
protected PayloadNearSpanScorer(Spans spans, Weight weight,
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
@ -200,13 +201,13 @@ public class PayloadNearQuery extends SpanNearQuery {
if (subSpans[i] instanceof NearSpansOrdered) {
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
subSpans[i].start(), subSpans[i].end());
subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
} else if (subSpans[i] instanceof NearSpansUnordered) {
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
subSpans[i].start(), subSpans[i].end());
subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
}
@ -233,7 +234,7 @@ public class PayloadNearQuery extends SpanNearQuery {
scratch.length = thePayload.length;
payloadScore = function.currentScore(doc, fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
spans.start(), spans.end(), scratch));
spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen;
}
}
@ -241,22 +242,20 @@ public class PayloadNearQuery extends SpanNearQuery {
//
@Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
do {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
Spans[] spansArr = new Spans[1];
spansArr[0] = spans;
getPayloads(spansArr);
more = spans.next();
} while (more && (doc == spans.doc()));
return true;
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
int matchLength = spans.endPosition() - startPos;
freq += docScorer.computeSlopFactor(matchLength);
Spans[] spansArr = new Spans[1];
spansArr[0] = spans;
getPayloads(spansArr);
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return true;
}
@Override

View File

@ -169,7 +169,7 @@ public class PayloadSpanUtil {
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
inorder);
inorder);
sp.setBoost(query.getBoost());
getPayloads(payloads, sp);
}
@ -186,11 +186,15 @@ public class PayloadSpanUtil {
}
for (LeafReaderContext leafReaderContext : context.leaves()) {
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
*/
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
@ -26,10 +27,10 @@ import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -60,14 +61,14 @@ public class PayloadTermQuery extends SpanTermQuery {
}
public PayloadTermQuery(Term term, PayloadFunction function,
boolean includeSpanScore) {
boolean includeSpanScore) {
super(term);
this.function = function;
this.function = Objects.requireNonNull(function);
this.includeSpanScore = includeSpanScore;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PayloadTermWeight(this, searcher);
}
@ -79,9 +80,11 @@ public class PayloadTermQuery extends SpanTermQuery {
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.simScorer(stats, context));
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
return (spans == null)
? null
: new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
@ -90,45 +93,42 @@ public class PayloadTermQuery extends SpanTermQuery {
protected int payloadsSeen;
private final TermSpans termSpans;
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
termSpans = spans;
termSpans = spans; // CHECKME: generics to use SpansScorer.spans as TermSpans.
}
@Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
int matchLength = spans.endPosition() - startPos;
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity);
more = spans.next();// this moves positions to the next match in this
// document
}
return more || (freq != 0);
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return freq != 0;
}
protected void processPayload(Similarity similarity) throws IOException {
if (termSpans.isPayloadAvailable()) {
if (spans.isPayloadAvailable()) {
final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
PayloadTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@ -220,7 +220,7 @@ public class PayloadTermQuery extends SpanTermQuery {
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((function == null) ? 0 : function.hashCode());
result = prime * result + function.hashCode();
result = prime * result + (includeSpanScore ? 1231 : 1237);
return result;
}
@ -234,14 +234,9 @@ public class PayloadTermQuery extends SpanTermQuery {
if (getClass() != obj.getClass())
return false;
PayloadTermQuery other = (PayloadTermQuery) obj;
if (function == null) {
if (other.function != null)
return false;
} else if (!function.equals(other.function))
return false;
if (includeSpanScore != other.includeSpanScore)
return false;
return true;
return function.equals(other.function);
}
}

View File

@ -106,7 +106,7 @@ public class FieldMaskingSpanQuery extends SpanQuery {
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return maskedQuery.createWeight(searcher, needsScores);
}

View File

@ -19,10 +19,13 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
import java.util.Objects;
import org.apache.lucene.search.TwoPhaseIterator;
/**
* A {@link Spans} implementation which allows wrapping another spans instance
* and override some selected methods.
* A {@link Spans} implementation wrapping another spans instance,
* allowing to override selected methods in a subclass.
*/
public class FilterSpans extends Spans {
@ -31,32 +34,37 @@ public class FilterSpans extends Spans {
/** Wrap the given {@link Spans}. */
public FilterSpans(Spans in) {
this.in = in;
this.in = Objects.requireNonNull(in);
}
@Override
public boolean next() throws IOException {
return in.next();
public int nextDoc() throws IOException {
return in.nextDoc();
}
@Override
public boolean skipTo(int target) throws IOException {
return in.skipTo(target);
public int advance(int target) throws IOException {
return in.advance(target);
}
@Override
public int doc() {
return in.doc();
public int docID() {
return in.docID();
}
@Override
public int start() {
return in.start();
public int nextStartPosition() throws IOException {
return in.nextStartPosition();
}
@Override
public int end() {
return in.end();
public int startPosition() {
return in.startPosition();
}
@Override
public int endPosition() {
return in.endPosition();
}
@Override
@ -79,4 +87,8 @@ public class FilterSpans extends Spans {
return "Filter(" + in.toString() + ")";
}
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
return in.asTwoPhaseIterator();
}
}

View File

@ -0,0 +1,103 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.ConjunctionDISI;
import org.apache.lucene.search.TwoPhaseIterator;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
/**
* Common super class for un/ordered Spans
*/
abstract class NearSpans extends Spans {
SpanNearQuery query;
int allowedSlop;
List<Spans> subSpans; // in query order
DocIdSetIterator conjunction; // use to move to next doc with all clauses
boolean atFirstInCurrentDoc;
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
NearSpans(SpanNearQuery query, List<Spans> subSpans)
throws IOException {
this.query = Objects.requireNonNull(query);
this.allowedSlop = query.getSlop();
if (subSpans.size() < 2) {
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
}
this.subSpans = Objects.requireNonNull(subSpans); // in query order
this.conjunction = ConjunctionDISI.intersect(subSpans);
}
@Override
public int docID() {
return conjunction.docID();
}
@Override
public long cost() {
return conjunction.cost();
}
@Override
public int nextDoc() throws IOException {
return (conjunction.nextDoc() == NO_MORE_DOCS)
? NO_MORE_DOCS
: toMatchDoc();
}
@Override
public int advance(int target) throws IOException {
return (conjunction.advance(target) == NO_MORE_DOCS)
? NO_MORE_DOCS
: toMatchDoc();
}
abstract int toMatchDoc() throws IOException;
abstract boolean twoPhaseCurrentDocMatches() throws IOException;
/**
* Return a {@link TwoPhaseIterator} view of this {@link NearSpans}.
*/
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
};
return res;
}
private Spans[] subSpansArray = null; // init only when needed.
public Spans[] getSubSpans() {
if (subSpansArray == null) {
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
}
return subSpansArray;
}
}

View File

@ -17,24 +17,18 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InPlaceMergeSorter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them.
* where the subspans do not overlap and have a maximum slop between them,
* and that does not need to collect payloads.
* To also collect payloads, see {@link NearSpansPayloadOrdered}.
* <p>
* The formed spans only contains minimum slop matches.<br>
* The matching slop is computed from the distance(s) between
@ -55,306 +49,196 @@ import java.util.Set;
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansOrdered extends Spans {
private final int allowedSlop;
private boolean firstTime = true;
private boolean more = false;
public class NearSpansOrdered extends NearSpans {
/** The spans in the same order as the SpanNearQuery */
private final Spans[] subSpans;
protected int matchDoc = -1;
protected int matchStart = -1;
protected int matchEnd = -1;
/** Indicates that all subSpans have same doc() */
private boolean inSameDoc = false;
private int matchDoc = -1;
private int matchStart = -1;
private int matchEnd = -1;
private List<byte[]> matchPayload;
private final Spans[] subSpansByDoc;
// Even though the array is probably almost sorted, InPlaceMergeSorter will likely
// perform better since it has a lower overhead than TimSorter for small arrays
private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(subSpansByDoc, i, j);
}
@Override
protected int compare(int i, int j) {
return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
}
};
private SpanNearQuery query;
private boolean collectPayloads = true;
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
this(spanNearQuery, context, acceptDocs, termContexts, true);
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
super(query, subSpans);
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
}
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
throws IOException {
if (spanNearQuery.getClauses().length < 2) {
throw new IllegalArgumentException("Less than 2 clauses: "
+ spanNearQuery);
}
this.collectPayloads = collectPayloads;
allowedSlop = spanNearQuery.getSlop();
SpanQuery[] clauses = spanNearQuery.getClauses();
subSpans = new Spans[clauses.length];
matchPayload = new LinkedList<>();
subSpansByDoc = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) {
subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
}
query = spanNearQuery; // kept for toString() only.
}
// inherit javadocs
@Override
public int doc() { return matchDoc; }
// inherit javadocs
@Override
public int start() { return matchStart; }
// inherit javadocs
@Override
public int end() { return matchEnd; }
public Spans[] getSubSpans() {
return subSpans;
}
// TODO: Remove warning after API has been finalized
// TODO: Would be nice to be able to lazy load payloads
@Override
public Collection<byte[]> getPayload() throws IOException {
return matchPayload;
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() {
return matchPayload.isEmpty() == false;
}
@Override
public long cost() {
long minCost = Long.MAX_VALUE;
for (int i = 0; i < subSpans.length; i++) {
minCost = Math.min(minCost, subSpans[i].cost());
}
return minCost;
}
// inherit javadocs
@Override
public boolean next() throws IOException {
if (firstTime) {
firstTime = false;
for (int i = 0; i < subSpans.length; i++) {
if (! subSpans[i].next()) {
more = false;
return false;
}
}
more = true;
}
if(collectPayloads) {
matchPayload.clear();
}
return advanceAfterOrdered();
}
// inherit javadocs
@Override
public boolean skipTo(int target) throws IOException {
if (firstTime) {
firstTime = false;
for (int i = 0; i < subSpans.length; i++) {
if (! subSpans[i].skipTo(target)) {
more = false;
return false;
}
}
more = true;
} else if (more && (subSpans[0].doc() < target)) {
if (subSpans[0].skipTo(target)) {
inSameDoc = false;
} else {
more = false;
return false;
}
}
if(collectPayloads) {
matchPayload.clear();
}
return advanceAfterOrdered();
}
/** Advances the subSpans to just after an ordered match with a minimum slop
* that is smaller than the slop allowed by the SpanNearQuery.
* @return true iff there is such a match.
*/
private boolean advanceAfterOrdered() throws IOException {
while (more && (inSameDoc || toSameDoc())) {
if (stretchToOrder() && shrinkToAfterShortestMatch()) {
return true;
}
}
return false; // no more matches
}
/** Advance the subSpans to the same document */
private boolean toSameDoc() throws IOException {
sorter.sort(0, subSpansByDoc.length);
int firstIndex = 0;
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
more = false;
inSameDoc = false;
return false;
}
maxDoc = subSpansByDoc[firstIndex].doc();
if (++firstIndex == subSpansByDoc.length) {
firstIndex = 0;
}
}
for (int i = 0; i < subSpansByDoc.length; i++) {
assert (subSpansByDoc[i].doc() == maxDoc)
: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
+ "\n at doc " + subSpansByDoc[i].doc()
+ ", but should be at " + maxDoc;
}
inSameDoc = true;
return true;
}
/** Check whether two Spans in the same document are ordered and not overlapping.
* @return false iff spans2's start position is smaller than spans1's end position
*/
static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
assert spans1.start() < spans1.end();
assert spans2.start() < spans2.end();
return spans1.end() <= spans2.start();
}
/** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
* starts and ends as parameters.
*/
private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
assert start1 < end1;
assert start2 < end2;
return end1 <= start2;
}
/** Order the subSpans within the same document by advancing all later spans
* after the previous one.
*/
private boolean stretchToOrder() throws IOException {
matchDoc = subSpans[0].doc();
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
if (! subSpans[i].next()) {
inSameDoc = false;
more = false;
break;
} else if (matchDoc != subSpans[i].doc()) {
inSameDoc = false;
break;
@Override
int toMatchDoc() throws IOException {
subSpansToFirstStartPosition();
while (true) {
if (! stretchToOrder()) {
if (conjunction.nextDoc() == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
subSpansToFirstStartPosition();
} else {
if (shrinkToAfterShortestMatch()) {
atFirstInCurrentDoc = true;
return conjunction.docID();
}
// not a match, after shortest ordered spans, not at beginning of doc.
if (oneExhaustedInCurrentDoc) {
if (conjunction.nextDoc() == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
subSpansToFirstStartPosition();
}
}
}
return inSameDoc;
}
@Override
boolean twoPhaseCurrentDocMatches() throws IOException {
subSpansToFirstStartPosition();
while (true) {
if (! stretchToOrder()) {
return false;
}
if (shrinkToAfterShortestMatch()) {
atFirstInCurrentDoc = true;
return true;
}
// not a match, after shortest ordered spans
if (oneExhaustedInCurrentDoc) {
return false;
}
}
}
@Override
public int nextStartPosition() throws IOException {
if (atFirstInCurrentDoc) {
atFirstInCurrentDoc = false;
return matchStart;
}
while (true) {
if (oneExhaustedInCurrentDoc) {
matchStart = NO_MORE_POSITIONS;
matchEnd = NO_MORE_POSITIONS;
return NO_MORE_POSITIONS;
}
if (! stretchToOrder()) {
matchStart = NO_MORE_POSITIONS;
matchEnd = NO_MORE_POSITIONS;
return NO_MORE_POSITIONS;
}
if (shrinkToAfterShortestMatch()) { // may also leave oneExhaustedInCurrentDoc
return matchStart;
}
// after shortest ordered spans, or oneExhaustedInCurrentDoc
}
}
private void subSpansToFirstStartPosition() throws IOException {
for (Spans spans : subSpans) {
assert spans.startPosition() == -1 : "spans="+spans;
spans.nextStartPosition();
assert spans.startPosition() != NO_MORE_POSITIONS;
}
oneExhaustedInCurrentDoc = false;
}
/** Order the subSpans within the same document by using nextStartPosition on all subSpans
* after the first as little as necessary.
* Return true when the subSpans could be ordered in this way,
* otherwise at least one is exhausted in the current doc.
*/
private boolean stretchToOrder() throws IOException {
Spans prevSpans = subSpans.get(0);
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
for (int i = 1; i < subSpans.size(); i++) {
Spans spans = subSpans.get(i);
assert spans.startPosition() != NO_MORE_POSITIONS;
assert spans.endPosition() != NO_MORE_POSITIONS;
while (prevSpans.endPosition() > spans.startPosition()) { // while overlapping spans
if (spans.nextStartPosition() == NO_MORE_POSITIONS) {
return false;
}
}
prevSpans = spans;
}
return true; // all subSpans ordered and non overlapping
}
/** The subSpans are ordered in the same doc, so there is a possible match.
* Compute the slop while making the match as short as possible by advancing
* all subSpans except the last one in reverse order.
* Compute the slop while making the match as short as possible by using nextStartPosition
* on all subSpans, except the last one, in reverse order.
*/
private boolean shrinkToAfterShortestMatch() throws IOException {
matchStart = subSpans[subSpans.length - 1].start();
matchEnd = subSpans[subSpans.length - 1].end();
Set<byte[]> possibleMatchPayloads = new HashSet<>();
if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
}
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
Collection<byte[]> possiblePayload = null;
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
if (collectPayloads && prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<>(payload.size());
possiblePayload.addAll(payload);
}
int prevStart = prevSpans.start();
int prevEnd = prevSpans.end();
while (true) { // Advance prevSpans until after (lastStart, lastEnd)
if (! prevSpans.next()) {
inSameDoc = false;
more = false;
break; // Check remaining subSpans for final match.
} else if (matchDoc != prevSpans.doc()) {
inSameDoc = false; // The last subSpans is not advanced here.
break; // Check remaining subSpans for last match in this document.
} else {
int ppStart = prevSpans.start();
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
break; // Check remaining subSpans.
} else { // prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
if (collectPayloads && prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<>(payload.size());
possiblePayload.addAll(payload);
}
}
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
oneExhaustedInCurrentDoc = true;
break; // Check remaining subSpans for match.
}
int ppStart = prevSpans.startPosition();
int ppEnd = prevSpans.endPosition();
if (ppEnd > lastStart) { // if overlapping spans
break; // Check remaining subSpans.
}
// prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
}
if (collectPayloads && possiblePayload != null) {
possibleMatchPayloads.addAll(possiblePayload);
}
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
}
/* Do not break on (matchSlop > allowedSlop) here to make sure
* that subSpans[0] is advanced after the match, if any.
* that on return the first subSpans has nextStartPosition called.
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;
if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
matchPayload.addAll(possibleMatchPayloads);
}
return match; // ordered and allowed slop
}
@Override
public int startPosition() {
return atFirstInCurrentDoc ? -1 : matchStart;
}
@Override
public int endPosition() {
return atFirstInCurrentDoc ? -1 : matchEnd;
}
/** Throws an UnsupportedOperationException */
@Override
public Collection<byte[]> getPayload() throws IOException {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
}
/** Throws an UnsupportedOperationException */
@Override
public boolean isPayloadAvailable() {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
}
@Override
public String toString() {
return getClass().getName() + "("+query.toString()+")@"+
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
}

View File

@ -0,0 +1,146 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
/** A {@link NearSpansOrdered} that allows collecting payloads.
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansPayloadOrdered extends NearSpansOrdered {
private List<byte[]> matchPayload;
private Set<byte[]> possibleMatchPayloads;
public NearSpansPayloadOrdered(SpanNearQuery query, List<Spans> subSpans)
throws IOException {
super(query, subSpans);
this.matchPayload = new LinkedList<>();
this.possibleMatchPayloads = new HashSet<>();
}
/** The subSpans are ordered in the same doc, so there is a possible match.
* Compute the slop while making the match as short as possible by using nextStartPosition
* on all subSpans, except the last one, in reverse order.
* Also collect the payloads.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
matchPayload.clear();
possibleMatchPayloads.clear();
if (lastSubSpans.isPayloadAvailable()) {
possibleMatchPayloads.addAll(lastSubSpans.getPayload());
}
Collection<byte[]> possiblePayload = null;
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<>(payload.size());
possiblePayload.addAll(payload);
}
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
oneExhaustedInCurrentDoc = true;
break; // Check remaining subSpans for match.
}
int ppStart = prevSpans.startPosition();
int ppEnd = prevSpans.endPosition();
if (ppEnd > lastStart) { // if overlapping spans
break; // Check remaining subSpans.
}
// prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
if (possiblePayload == null) {
possiblePayload = new ArrayList<>(payload.size());
} else {
possiblePayload.clear();
}
possiblePayload.addAll(payload);
}
}
if (possiblePayload != null) {
possibleMatchPayloads.addAll(possiblePayload);
}
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
}
/* Do not break on (matchSlop > allowedSlop) here to make sure
* that on return the first subSpans has nextStartPosition called.
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;
if (match && possibleMatchPayloads.size() > 0) {
matchPayload.addAll(possibleMatchPayloads);
}
return match; // ordered and allowed slop
}
// TODO: Remove warning after API has been finalized
// TODO: Would be nice to be able to lazy load payloads
/** Return payloads when available. */
@Override
public Collection<byte[]> getPayload() throws IOException {
return matchPayload;
}
/** Indicates whether payloads are available */
@Override
public boolean isPayloadAvailable() {
return ! matchPayload.isEmpty();
}
@Override
public String toString() {
return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
}

View File

@ -17,253 +17,225 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
/**
* Similar to {@link NearSpansOrdered}, but for the unordered case.
*
*
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansUnordered extends Spans {
private SpanNearQuery query;
public class NearSpansUnordered extends NearSpans {
private List<SpansCell> ordered = new ArrayList<>(); // spans in query order
private Spans[] subSpans;
private int slop; // from query
private List<SpansCell> subSpanCells; // in query order
private SpansCell first; // linked list of spans
private SpansCell last; // sorted by doc only
private SpanPositionQueue spanPositionQueue;
private int totalLength; // sum of current lengths
public NearSpansUnordered(SpanNearQuery query, List<Spans> subSpans)
throws IOException {
super(query, subSpans);
private CellQueue queue; // sorted queue of spans
private SpansCell max; // max element in queue
this.subSpanCells = new ArrayList<>(subSpans.size());
for (Spans subSpan : subSpans) { // sub spans in query order
this.subSpanCells.add(new SpansCell(subSpan));
}
spanPositionQueue = new SpanPositionQueue(subSpans.size());
singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1
}
private boolean more = true; // true iff not done
private boolean firstTime = true; // true before first next()
private void singleCellToPositionQueue() {
maxEndPositionCell = subSpanCells.get(0);
assert maxEndPositionCell.docID() == -1;
assert maxEndPositionCell.startPosition() == -1;
spanPositionQueue.add(maxEndPositionCell);
}
private class CellQueue extends PriorityQueue<SpansCell> {
public CellQueue(int size) {
private void subSpanCellsToPositionQueue() throws IOException { // used when all subSpanCells arrived at the same doc.
spanPositionQueue.clear();
for (SpansCell cell : subSpanCells) {
assert cell.startPosition() == -1;
cell.nextStartPosition();
assert cell.startPosition() != NO_MORE_POSITIONS;
spanPositionQueue.add(cell);
}
}
/** SpansCell wraps a sub Spans to maintain totalSpanLength and maxEndPositionCell */
private int totalSpanLength;
private SpansCell maxEndPositionCell;
private class SpansCell extends FilterSpans {
private int spanLength = -1;
public SpansCell(Spans spans) {
super(spans);
}
@Override
public int nextStartPosition() throws IOException {
int res = in.nextStartPosition();
if (res != NO_MORE_POSITIONS) {
adjustLength();
}
adjustMax(); // also after last end position in current doc.
return res;
}
private void adjustLength() {
if (spanLength != -1) {
totalSpanLength -= spanLength; // subtract old, possibly from a previous doc
}
assert in.startPosition() != NO_MORE_POSITIONS;
spanLength = endPosition() - startPosition();
assert spanLength >= 0;
totalSpanLength += spanLength; // add new
}
private void adjustMax() {
assert docID() == maxEndPositionCell.docID();
if (endPosition() > maxEndPositionCell.endPosition()) {
maxEndPositionCell = this;
}
}
@Override
public String toString() {
return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
}
}
private static class SpanPositionQueue extends PriorityQueue<SpansCell> {
public SpanPositionQueue(int size) {
super(size);
}
@Override
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
if (spans1.doc() == spans2.doc()) {
return docSpansOrdered(spans1, spans2);
} else {
return spans1.doc() < spans2.doc();
}
return positionsOrdered(spans1, spans2);
}
}
/** Wraps a Spans, and can be used to form a linked list. */
private class SpansCell extends Spans {
private Spans spans;
private SpansCell next;
private int length = -1;
private int index;
public SpansCell(Spans spans, int index) {
this.spans = spans;
this.index = index;
}
@Override
public boolean next() throws IOException {
return adjust(spans.next());
}
@Override
public boolean skipTo(int target) throws IOException {
return adjust(spans.skipTo(target));
}
private boolean adjust(boolean condition) {
if (length != -1) {
totalLength -= length; // subtract old length
}
if (condition) {
length = end() - start();
totalLength += length; // add new length
if (max == null || doc() > max.doc()
|| (doc() == max.doc()) && (end() > max.end())) {
max = this;
}
}
more = condition;
return condition;
}
@Override
public int doc() { return spans.doc(); }
@Override
public int start() { return spans.start(); }
@Override
public int end() { return spans.end(); }
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
return new ArrayList<>(spans.getPayload());
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return spans.isPayloadAvailable();
}
@Override
public long cost() {
return spans.cost();
}
@Override
public String toString() { return spans.toString() + "#" + index; }
}
public NearSpansUnordered(SpanNearQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
throws IOException {
this.query = query;
this.slop = query.getSlop();
SpanQuery[] clauses = query.getClauses();
queue = new CellQueue(clauses.length);
subSpans = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) {
SpansCell cell =
new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
ordered.add(cell);
subSpans[i] = cell.spans;
}
}
public Spans[] getSubSpans() {
return subSpans;
}
@Override
public boolean next() throws IOException {
if (firstTime) {
initList(true);
listToQueue(); // initialize queue
firstTime = false;
} else if (more) {
if (min().next()) { // trigger further scanning
queue.updateTop(); // maintain queue
} else {
more = false;
}
}
while (more) {
boolean queueStale = false;
if (min().doc() != max.doc()) { // maintain list
queueToList();
queueStale = true;
}
// skip to doc w/ all clauses
while (more && first.doc() < last.doc()) {
more = first.skipTo(last.doc()); // skip first upto last
firstToLast(); // and move it to the end
queueStale = true;
}
if (!more) return false;
// found doc w/ all clauses
if (queueStale) { // maintain the queue
listToQueue();
queueStale = false;
}
if (atMatch()) {
return true;
}
more = min().next();
if (more) {
queue.updateTop(); // maintain queue
}
}
return false; // no more matches
}
@Override
public boolean skipTo(int target) throws IOException {
if (firstTime) { // initialize
initList(false);
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
more = cell.skipTo(target); // skip all
}
if (more) {
listToQueue();
}
firstTime = false;
} else { // normal case
while (more && min().doc() < target) { // skip as needed
if (min().skipTo(target)) {
queue.updateTop();
} else {
more = false;
}
}
}
return more && (atMatch() || next());
}
/** Check whether two Spans in the same document are ordered with possible overlap.
* @return true iff spans1 starts before spans2
* or the spans start at the same position,
* and spans1 ends before spans2.
*/
static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
int start1 = spans1.start();
int start2 = spans2.start();
return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
static final boolean positionsOrdered(Spans spans1, Spans spans2) {
assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID();
int start1 = spans1.startPosition();
int start2 = spans2.startPosition();
return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2);
}
private SpansCell min() { return queue.top(); }
private SpansCell minPositionCell() {
return spanPositionQueue.top();
}
private boolean atMatch() {
assert minPositionCell().docID() == maxEndPositionCell.docID();
return (maxEndPositionCell.endPosition() - minPositionCell().startPosition() - totalSpanLength) <= allowedSlop;
}
@Override
public int doc() { return min().doc(); }
@Override
public int start() { return min().start(); }
@Override
public int end() { return max.end(); }
int toMatchDoc() throws IOException {
// at doc with all subSpans
subSpanCellsToPositionQueue();
while (true) {
if (atMatch()) {
atFirstInCurrentDoc = true;
oneExhaustedInCurrentDoc = false;
return conjunction.docID();
}
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
spanPositionQueue.updateTop();
}
else { // exhausted a subSpan in current doc
if (conjunction.nextDoc() == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
// at doc with all subSpans
subSpanCellsToPositionQueue();
}
}
}
@Override
boolean twoPhaseCurrentDocMatches() throws IOException {
// at doc with all subSpans
subSpanCellsToPositionQueue();
while (true) {
if (atMatch()) {
atFirstInCurrentDoc = true;
oneExhaustedInCurrentDoc = false;
return true;
}
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
spanPositionQueue.updateTop();
}
else { // exhausted a subSpan in current doc
return false;
}
}
}
@Override
public int nextStartPosition() throws IOException {
if (atFirstInCurrentDoc) {
atFirstInCurrentDoc = false;
return minPositionCell().startPosition();
}
while (minPositionCell().startPosition() == -1) { // initially at current doc
minPositionCell().nextStartPosition();
spanPositionQueue.updateTop();
}
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
while (true) {
if (minPositionCell().nextStartPosition() == NO_MORE_POSITIONS) {
oneExhaustedInCurrentDoc = true;
return NO_MORE_POSITIONS;
}
spanPositionQueue.updateTop();
if (atMatch()) {
return minPositionCell().startPosition();
}
}
}
@Override
public int startPosition() {
assert minPositionCell() != null;
return atFirstInCurrentDoc ? -1
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
: minPositionCell().startPosition();
}
@Override
public int endPosition() {
return atFirstInCurrentDoc ? -1
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
: maxEndPositionCell.endPosition();
}
// TODO: Remove warning after API has been finalized
/**
* WARNING: The List is not necessarily in order of the the positions
* WARNING: The List is not necessarily in order of the positions.
* @return Collection of <code>byte[]</code> payloads
* @throws IOException if there is a low-level I/O error
*/
@Override
public Collection<byte[]> getPayload() throws IOException {
Set<byte[]> matchPayload = new HashSet<>();
for (SpansCell cell = first; cell != null; cell = cell.next) {
for (SpansCell cell : subSpanCells) {
if (cell.isPayloadAvailable()) {
matchPayload.addAll(cell.getPayload());
}
@ -271,78 +243,23 @@ public class NearSpansUnordered extends Spans {
return matchPayload;
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
SpansCell pointer = min();
while (pointer != null) {
if (pointer.isPayloadAvailable()) {
for (SpansCell cell : subSpanCells) {
if (cell.isPayloadAvailable()) {
return true;
}
pointer = pointer.next;
}
return false;
}
@Override
public long cost() {
long minCost = Long.MAX_VALUE;
for (int i = 0; i < subSpans.length; i++) {
minCost = Math.min(minCost, subSpans[i].cost());
}
return minCost;
}
@Override
public String toString() {
return getClass().getName() + "("+query.toString()+")@"+
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
}
private void initList(boolean next) throws IOException {
for (int i = 0; more && i < ordered.size(); i++) {
SpansCell cell = ordered.get(i);
if (next)
more = cell.next(); // move to first entry
if (more) {
addToList(cell); // add to list
}
if (minPositionCell() != null) {
return getClass().getName() + "("+query.toString()+")@"+
(docID()+":"+startPosition()+"-"+endPosition());
} else {
return getClass().getName() + "("+query.toString()+")@ ?START?";
}
}
private void addToList(SpansCell cell) {
if (last != null) { // add next to end of list
last.next = cell;
} else
first = cell;
last = cell;
cell.next = null;
}
private void firstToLast() {
last.next = first; // move first to end of list
last = first;
first = first.next;
last.next = null;
}
private void queueToList() {
last = first = null;
while (queue.top() != null) {
addToList(queue.pop());
}
}
private void listToQueue() {
queue.clear(); // rebuild queue
for (SpansCell cell = first; cell != null; cell = cell.next) {
queue.add(cell); // add to queue from list
}
}
private boolean atMatch() {
return (min().doc() == max.doc())
&& ((max.end() - min().start() - totalLength) <= slop);
}
}

View File

@ -21,9 +21,9 @@ import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
/**
/**
* Matches spans near the beginning of a field.
* <p>
* <p>
* This class is a simple extension of {@link SpanPositionRangeQuery} in that it assumes the
* start to be zero and only checks the end boundary.
*/
@ -37,10 +37,10 @@ public class SpanFirstQuery extends SpanPositionRangeQuery {
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
assert spans.start() != spans.end() : "start equals end: " + spans.start();
if (spans.start() >= end)
return AcceptStatus.NO_AND_ADVANCE;
else if (spans.end() <= end)
assert spans.startPosition() != spans.endPosition() : "start equals end: " + spans.startPosition();
if (spans.startPosition() >= end)
return AcceptStatus.NO_MORE_IN_CURRENT_DOC;
else if (spans.endPosition() <= end)
return AcceptStatus.YES;
else
return AcceptStatus.NO;

View File

@ -105,7 +105,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
@Override
public int hashCode() {
int h = match.hashCode();
int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
//TODO: is this right?
h ^= payloadToMatch.hashCode();

View File

@ -37,7 +37,8 @@ import org.apache.lucene.util.ToStringUtils;
/** Matches spans which are near one another. One can specify <i>slop</i>, the
* maximum number of intervening unmatched positions, as well as whether
* matches are required to be in-order. */
* matches are required to be in-order.
*/
public class SpanNearQuery extends SpanQuery implements Cloneable {
protected List<SpanQuery> clauses;
protected int slop;
@ -53,22 +54,19 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
* must be in the same order as in <code>clauses</code> and must be non-overlapping.
* <br>When <code>inOrder</code> is false, the spans from each clause
* need not be ordered and may overlap.
* @param clauses the clauses to find near each other
* @param clauses the clauses to find near each other, in the same field, at least 2.
* @param slop The slop value
* @param inOrder true if order is important
*/
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
this(clauses, slop, inOrder, true);
this(clauses, slop, inOrder, true);
}
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) {
// copy clauses array into an ArrayList
this.clauses = new ArrayList<>(clauses.length);
for (int i = 0; i < clauses.length; i++) {
SpanQuery clause = clauses[i];
if (field == null) { // check field
field = clause.getField();
public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, boolean collectPayloads) {
this.clauses = new ArrayList<>(clausesIn.length);
for (SpanQuery clause : clausesIn) {
if (this.field == null) { // check field
this.field = clause.getField();
} else if (clause.getField() != null && !clause.getField().equals(field)) {
throw new IllegalArgumentException("Clauses must have same field.");
}
@ -92,14 +90,13 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
@Override
public String getField() { return field; }
@Override
public void extractTerms(Set<Term> terms) {
for (final SpanQuery clause : clauses) {
clause.extractTerms(terms);
}
}
}
@Override
public String toString(String field) {
@ -124,15 +121,21 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 0) // optimize 0-clause case
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
if (clauses.size() == 1) // optimize 1-clause case
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
return inOrder
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
for (SpanQuery seq : clauses) {
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
if (subSpan != null) {
subSpans.add(subSpan);
} else {
return null; // all required
}
}
// all NearSpans require at least two subSpans
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
: new NearSpansOrdered(this, subSpans);
}
@Override
@ -148,12 +151,12 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
}
if (clone != null) {
return clone; // some clauses rewrote
return clone; // some clauses rewrote
} else {
return this; // no clauses rewrote
return this; // no clauses rewrote
}
}
@Override
public SpanNearQuery clone() {
int sz = clauses.size();

View File

@ -30,9 +30,11 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
/** Removes matches which overlap with another SpanQuery or
* within a x tokens before or y tokens after another SpanQuery. */
/** Removes matches which overlap with another SpanQuery or which are
* within x tokens before or y tokens after another SpanQuery.
*/
public class SpanNotQuery extends SpanQuery implements Cloneable {
private SpanQuery include;
private SpanQuery exclude;
@ -45,20 +47,20 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
this(include, exclude, 0, 0);
}
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* have no overlap with spans from <code>exclude</code> within
* <code>dist</code> tokens of <code>include</code>. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist);
}
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* have no overlap with spans from <code>exclude</code> within
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = include;
this.exclude = exclude;
this.include = Objects.requireNonNull(include);
this.exclude = Objects.requireNonNull(exclude);
this.pre = (pre >=0) ? pre : 0;
this.post = (post >= 0) ? post : 0;
@ -96,81 +98,153 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
@Override
public SpanNotQuery clone() {
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
(SpanQuery) exclude.clone(), pre, post);
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.clone(),
(SpanQuery) exclude.clone(), pre, post);
spanNotQuery.setBoost(getBoost());
return spanNotQuery;
return spanNotQuery;
}
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
if (includeSpans == null) {
return null;
}
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
if (excludeSpans == null) {
return includeSpans;
}
return new Spans() {
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
private boolean moreInclude = true;
private boolean moreInclude = true;
private int includeStart = -1;
private int includeEnd = -1;
private boolean atFirstInCurrentDoc = false;
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
private boolean moreExclude = excludeSpans.next();
private boolean moreExclude = excludeSpans.nextDoc() != NO_MORE_DOCS;
private int excludeStart = moreExclude ? excludeSpans.nextStartPosition() : NO_MORE_POSITIONS;
@Override
public boolean next() throws IOException {
if (moreInclude) // move to next include
moreInclude = includeSpans.next();
while (moreInclude && moreExclude) {
@Override
public int nextDoc() throws IOException {
if (moreInclude) {
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
if (moreInclude) {
atFirstInCurrentDoc = true;
includeStart = includeSpans.nextStartPosition();
assert includeStart != NO_MORE_POSITIONS;
}
}
toNextIncluded();
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
return res;
}
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start() - pre) {
moreExclude = excludeSpans.next(); // increment exclude
private void toNextIncluded() throws IOException {
while (moreInclude && moreExclude) {
if (includeSpans.docID() > excludeSpans.docID()) {
moreExclude = excludeSpans.advance(includeSpans.docID()) != NO_MORE_DOCS;
if (moreExclude) {
excludeStart = -1; // only use exclude positions at same doc
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end()+post <= excludeSpans.start())
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
}
return moreInclude;
}
@Override
public boolean skipTo(int target) throws IOException {
if (moreInclude) // skip include
moreInclude = includeSpans.skipTo(target);
if (!moreInclude)
return false;
if (moreExclude // skip exclude
&& includeSpans.doc() > excludeSpans.doc())
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()-pre) {
moreExclude = excludeSpans.next(); // increment exclude
if (excludeForwardInCurrentDocAndAtMatch()) {
break; // at match.
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end()+post <= excludeSpans.start())
return true; // we found a match
// else intersected: keep scanning, to next doc if needed
includeStart = includeSpans.nextStartPosition();
if (includeStart == NO_MORE_POSITIONS) {
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
if (moreInclude) {
atFirstInCurrentDoc = true;
includeStart = includeSpans.nextStartPosition();
assert includeStart != NO_MORE_POSITIONS;
}
}
}
}
return next(); // scan to next match
private boolean excludeForwardInCurrentDocAndAtMatch() throws IOException {
assert moreInclude;
assert includeStart != NO_MORE_POSITIONS;
if (! moreExclude) {
return true;
}
if (includeSpans.docID() != excludeSpans.docID()) {
return true;
}
// at same doc
if (excludeStart == -1) { // init exclude start position if needed
excludeStart = excludeSpans.nextStartPosition();
assert excludeStart != NO_MORE_POSITIONS;
}
while (excludeSpans.endPosition() <= includeStart - pre) {
// exclude end position is before a possible exclusion
excludeStart = excludeSpans.nextStartPosition();
if (excludeStart == NO_MORE_POSITIONS) {
return true; // no more exclude at current doc.
}
}
// exclude end position far enough in current doc, check start position:
boolean res = includeSpans.endPosition() + post <= excludeStart;
return res;
}
@Override
public int advance(int target) throws IOException {
if (moreInclude) {
assert target > includeSpans.docID() : "target="+target+", includeSpans.docID()="+includeSpans.docID();
moreInclude = includeSpans.advance(target) != NO_MORE_DOCS;
if (moreInclude) {
atFirstInCurrentDoc = true;
includeStart = includeSpans.nextStartPosition();
assert includeStart != NO_MORE_POSITIONS;
}
}
toNextIncluded();
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
return res;
}
@Override
public int docID() {
int res = includeSpans.docID();
return res;
}
@Override
public int nextStartPosition() throws IOException {
assert moreInclude;
if (atFirstInCurrentDoc) {
atFirstInCurrentDoc = false;
assert includeStart != NO_MORE_POSITIONS;
return includeStart;
}
@Override
public int doc() { return includeSpans.doc(); }
@Override
public int start() { return includeSpans.start(); }
@Override
public int end() { return includeSpans.end(); }
includeStart = includeSpans.nextStartPosition();
while ((includeStart != NO_MORE_POSITIONS)
&& (! excludeForwardInCurrentDocAndAtMatch()))
{
includeStart = includeSpans.nextStartPosition();
}
return includeStart;
}
@Override
public int startPosition() {
assert includeStart == includeSpans.startPosition();
return atFirstInCurrentDoc ? -1 : includeStart;
}
@Override
public int endPosition() {
return atFirstInCurrentDoc ? -1 : includeSpans.endPosition();
}
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
@ -180,7 +254,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
return result;
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return includeSpans.isPayloadAvailable();
@ -193,10 +266,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
@Override
public String toString() {
return "spans(" + SpanNotQuery.this.toString() + ")";
}
};
return "spans(" + SpanNotQuery.this.toString() + ")";
}
};
}
@Override
@ -230,7 +302,7 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
SpanNotQuery other = (SpanNotQuery)o;
return this.include.equals(other.include)
&& this.exclude.equals(other.exclude)
&& this.pre == other.pre
&& this.pre == other.pre
&& this.post == other.post;
}

View File

@ -35,18 +35,19 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query;
/** Matches the union of its clauses.*/
/** Matches the union of its clauses.
*/
public class SpanOrQuery extends SpanQuery implements Cloneable {
private List<SpanQuery> clauses;
private String field;
/** Construct a SpanOrQuery merging the provided clauses. */
/** Construct a SpanOrQuery merging the provided clauses.
* All clauses must have the same field.
*/
public SpanOrQuery(SpanQuery... clauses) {
// copy clauses array into an ArrayList
this.clauses = new ArrayList<>(clauses.length);
for (int i = 0; i < clauses.length; i++) {
addClause(clauses[i]);
for (SpanQuery seq : clauses) {
addClause(seq);
}
}
@ -59,7 +60,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
}
this.clauses.add(clause);
}
/** Return the clauses whose spans are matched. */
public SpanQuery[] getClauses() {
return clauses.toArray(new SpanQuery[clauses.size()]);
@ -74,7 +75,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
clause.extractTerms(terms);
}
}
@Override
public SpanOrQuery clone() {
int sz = clauses.size();
@ -152,90 +153,120 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
@Override
protected final boolean lessThan(Spans spans1, Spans spans2) {
if (spans1.doc() == spans2.doc()) {
if (spans1.start() == spans2.start()) {
return spans1.end() < spans2.end();
if (spans1.docID() == spans2.docID()) {
if (spans1.startPosition() == spans2.startPosition()) {
return spans1.endPosition() < spans2.endPosition();
} else {
return spans1.start() < spans2.start();
return spans1.startPosition() < spans2.startPosition();
}
} else {
return spans1.doc() < spans2.doc();
return spans1.docID() < spans2.docID();
}
}
}
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 1) // optimize 1-clause case
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
throws IOException {
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
for (SpanQuery seq : clauses) {
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
if (subSpan != null) {
subSpans.add(subSpan);
}
}
if (subSpans.size() == 0) {
return null;
} else if (subSpans.size() == 1) {
return subSpans.get(0);
}
SpanQueue queue = new SpanQueue(clauses.size());
for (Spans spans : subSpans) {
queue.add(spans);
}
return new Spans() {
private SpanQueue queue = null;
private long cost;
private boolean initSpanQueue(int target) throws IOException {
queue = new SpanQueue(clauses.size());
Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) {
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
cost += spans.cost();
if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) {
queue.add(spans);
}
}
return queue.size() != 0;
@Override
public int nextDoc() throws IOException {
if (queue.size() == 0) { // all done
return NO_MORE_DOCS;
}
@Override
public boolean next() throws IOException {
if (queue == null) {
return initSpanQueue(-1);
}
int currentDoc = top().docID();
if (queue.size() == 0) { // all done
return false;
}
if (currentDoc == -1) { // initially
return advance(0);
}
if (top().next()) { // move to next
do {
if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
queue.updateTop();
return true;
}
queue.pop(); // exhausted a clause
return queue.size() != 0;
}
private Spans top() { return queue.top(); }
@Override
public boolean skipTo(int target) throws IOException {
if (queue == null) {
return initSpanQueue(target);
}
boolean skipCalled = false;
while (queue.size() != 0 && top().doc() < target) {
if (top().skipTo(target)) {
queue.updateTop();
} else {
queue.pop();
} else {
queue.pop(); // exhausted a clause
if (queue.size() == 0) {
return NO_MORE_DOCS;
}
skipCalled = true;
}
if (skipCalled) {
return queue.size() != 0;
// assert queue.size() > 0;
int doc = top().docID();
if (doc > currentDoc) {
return doc;
}
} while (true);
}
private Spans top() {
return queue.top();
}
@Override
public int advance(int target) throws IOException {
while ((queue.size() > 0) && (top().docID() < target)) {
if (top().advance(target) != NO_MORE_DOCS) {
queue.updateTop();
} else {
queue.pop();
}
return next();
}
@Override
public int doc() { return top().doc(); }
@Override
public int start() { return top().start(); }
@Override
public int end() { return top().end(); }
return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
}
@Override
public int docID() {
return (queue == null) ? -1
: (queue.size() > 0) ? top().docID()
: NO_MORE_DOCS;
}
@Override
public int nextStartPosition() throws IOException {
top().nextStartPosition();
queue.updateTop();
int startPos = top().startPosition();
while (startPos == -1) { // initially at this doc
top().nextStartPosition();
queue.updateTop();
startPos = top().startPosition();
}
return startPos;
}
@Override
public int startPosition() {
return top().startPosition();
}
@Override
public int endPosition() {
return top().endPosition();
}
@Override
public Collection<byte[]> getPayload() throws IOException {
@ -257,15 +288,23 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
public String toString() {
return "spans("+SpanOrQuery.this+")@"+
((queue == null)?"START"
:(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
}
:(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
}
private long cost = -1;
@Override
public long cost() {
if (cost == -1) {
cost = 0;
for (Spans spans : subSpans) {
cost += spans.cost();
}
}
return cost;
}
};
};
}
}

View File

@ -28,15 +28,14 @@ import java.util.Iterator;
* Only return those matches that have a specific payload at
* the given position.
* <p>
* Do not use this with an SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. Instead, use
* {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
* Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
*/
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
protected final Collection<byte[]> payloadToMatch;
/**
*
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
*/
@ -71,7 +70,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
}
}
return AcceptStatus.YES;
}
}
@Override
public String toString(String field) {
@ -108,7 +107,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
@Override
public int hashCode() {
int h = match.hashCode();
int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
//TODO: is this right?
h ^= payloadToMatch.hashCode();

View File

@ -25,10 +25,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
/**
@ -37,9 +36,8 @@ import java.util.Set;
public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable {
protected SpanQuery match;
public SpanPositionCheckQuery(SpanQuery match) {
this.match = match;
this.match = Objects.requireNonNull(match);
}
/**
@ -60,42 +58,44 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
match.extractTerms(terms);
}
/**
/**
* Return value for {@link SpanPositionCheckQuery#acceptPosition(Spans)}.
*/
protected static enum AcceptStatus {
/** Indicates the match should be accepted */
YES,
/** Indicates the match should be rejected */
NO,
/**
* Indicates the match should be rejected, and the enumeration should advance
* to the next document.
/**
* Indicates the match should be rejected, and the enumeration may continue
* with the next document.
*/
NO_AND_ADVANCE
NO_MORE_IN_CURRENT_DOC
};
/**
* Implementing classes are required to return whether the current position is a match for the passed in
* "match" {@link org.apache.lucene.search.spans.SpanQuery}.
* "match" {@link SpanQuery}.
*
* This is only called if the underlying {@link org.apache.lucene.search.spans.Spans#next()} for the
* match is successful
* This is only called if the underlying last {@link Spans#nextStartPosition()} for the
* match indicated a valid start position.
*
*
* @param spans The {@link org.apache.lucene.search.spans.Spans} instance, positioned at the spot to check
* @param spans The {@link Spans} instance, positioned at the spot to check
*
* @return whether the match is accepted, rejected, or rejected and should move to the next doc.
*
* @see org.apache.lucene.search.spans.Spans#next()
* @see Spans#nextDoc()
*
*/
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return new PositionCheckSpan(context, acceptDocs, termContexts);
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
return (matchSpans == null) ? null : new PositionCheckSpans(matchSpans);
}
@ -116,79 +116,110 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
}
}
protected class PositionCheckSpan extends Spans {
private Spans spans;
protected class PositionCheckSpans extends FilterSpans {
public PositionCheckSpan(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
spans = match.getSpans(context, acceptDocs, termContexts);
private boolean atFirstInCurrentDoc = false;
private int startPos = -1;
public PositionCheckSpans(Spans matchSpans) throws IOException {
super(matchSpans);
}
@Override
public boolean next() throws IOException {
if (!spans.next())
return false;
return doNext();
public int nextDoc() throws IOException {
if (in.nextDoc() == NO_MORE_DOCS)
return NO_MORE_DOCS;
return toNextDocWithAllowedPosition();
}
@Override
public boolean skipTo(int target) throws IOException {
if (!spans.skipTo(target))
return false;
public int advance(int target) throws IOException {
if (in.advance(target) == NO_MORE_DOCS)
return NO_MORE_DOCS;
return doNext();
return toNextDocWithAllowedPosition();
}
protected boolean doNext() throws IOException {
@SuppressWarnings("fallthrough")
protected int toNextDocWithAllowedPosition() throws IOException {
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(this)) {
case YES: return true;
case NO:
if (!spans.next())
return false;
break;
case NO_AND_ADVANCE:
if (!spans.skipTo(spans.doc()+1))
return false;
case YES:
atFirstInCurrentDoc = true;
return in.docID();
case NO:
startPos = in.nextStartPosition();
if (startPos != NO_MORE_POSITIONS) {
break;
}
// else fallthrough
case NO_MORE_IN_CURRENT_DOC:
if (in.nextDoc() == NO_MORE_DOCS) {
startPos = -1;
return NO_MORE_DOCS;
}
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
break;
}
}
}
@Override
public int doc() { return spans.doc(); }
@Override
public int start() { return spans.start(); }
@Override
public int end() { return spans.end(); }
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
if (spans.isPayloadAvailable()) {
result = new ArrayList<>(spans.getPayload());
public int nextStartPosition() throws IOException {
if (atFirstInCurrentDoc) {
atFirstInCurrentDoc = false;
return startPos;
}
return result;//TODO: any way to avoid the new construction?
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return spans.isPayloadAvailable();
for (;;) {
startPos = in.nextStartPosition();
if (startPos == NO_MORE_POSITIONS) {
return NO_MORE_POSITIONS;
}
switch(acceptPosition(this)) {
case YES:
return startPos;
case NO:
break;
case NO_MORE_IN_CURRENT_DOC:
return startPos = NO_MORE_POSITIONS; // startPos ahead for the current doc.
}
}
}
@Override
public long cost() {
return spans.cost();
public int startPosition() {
return atFirstInCurrentDoc ? -1 : startPos;
}
@Override
public int endPosition() {
return atFirstInCurrentDoc ? -1
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
}
@Override
public String toString() {
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
}
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
}
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null) return false;
if (getClass() != o.getClass()) return false;
final SpanPositionCheckQuery spcq = (SpanPositionCheckQuery) o;
return match.equals(spcq.match);
}
@Override
public int hashCode() {
return match.hashCode() ^ getClass().hashCode();
}
}

View File

@ -25,10 +25,10 @@ import java.io.IOException;
/**
* Checks to see if the {@link #getMatch()} lies between a start and end position
*
* @see org.apache.lucene.search.spans.SpanFirstQuery for a derivation that is optimized for the case where start position is 0
* See {@link SpanFirstQuery} for a derivation that is optimized for the case where start position is 0.
*/
public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
protected int start = 0;
protected int start;
protected int end;
public SpanPositionRangeQuery(SpanQuery match, int start, int end) {
@ -40,13 +40,12 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
assert spans.start() != spans.end();
if (spans.start() >= end)
return AcceptStatus.NO_AND_ADVANCE;
else if (spans.start() >= start && spans.end() <= end)
return AcceptStatus.YES;
else
return AcceptStatus.NO;
assert spans.startPosition() != spans.endPosition();
AcceptStatus res = (spans.startPosition() >= end)
? AcceptStatus.NO_MORE_IN_CURRENT_DOC
: (spans.startPosition() >= start && spans.endPosition() <= end)
? AcceptStatus.YES : AcceptStatus.NO;
return res;
}
@ -96,7 +95,7 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
@Override
public int hashCode() {
int h = match.hashCode();
int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
h ^= Float.floatToRawIntBits(getBoost()) ^ end ^ start;
return h;

View File

@ -25,16 +25,17 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
/** Base class for span-based queries. */
public abstract class SpanQuery extends Query {
/** Expert: Returns the matches for this query in an index. Used internally
* to search for spans. */
/** Expert: Returns the matches for this query in an index.
* Used internally to search for spans.
* This may return null to indicate that the SpanQuery has no results.
*/
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
/**
/**
* Returns the name of the field matched by this query.
* <p>
* Note that this may return null if the query matches no terms.
@ -42,7 +43,7 @@ public abstract class SpanQuery extends Query {
public abstract String getField();
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanWeight(this, searcher);
}

View File

@ -18,9 +18,9 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
/**
@ -29,58 +29,68 @@ import org.apache.lucene.search.similarities.Similarity;
public class SpanScorer extends Scorer {
protected Spans spans;
protected boolean more = true;
protected int doc;
protected float freq;
protected int numMatches;
protected final Similarity.SimScorer docScorer;
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
this.docScorer = docScorer;
this.spans = spans;
doc = -1;
more = spans.next();
this.docScorer = Objects.requireNonNull(docScorer);
this.spans = Objects.requireNonNull(spans);
this.doc = -1;
}
@Override
public int nextDoc() throws IOException {
if (!setFreqCurrentDoc()) {
doc = NO_MORE_DOCS;
int prevDoc = doc;
doc = spans.nextDoc();
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
}
@Override
public int advance(int target) throws IOException {
if (!more) {
return doc = NO_MORE_DOCS;
}
if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
more = spans.skipTo(target);
}
if (!setFreqCurrentDoc()) {
doc = NO_MORE_DOCS;
int prevDoc = doc;
doc = spans.advance(target);
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
}
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
numMatches = 0;
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
int prevStartPos = -1;
int prevEndPos = -1;
int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
assert startPos >= prevStartPos;
int endPos = spans.endPosition();
assert endPos != Spans.NO_MORE_POSITIONS;
// This assertion can fail for Or spans on the same term:
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
more = spans.next();
} while (more && (doc == spans.doc()));
int matchLength = endPos - startPos;
freq += docScorer.computeSlopFactor(matchLength);
prevStartPos = startPos;
prevEndPos = endPos;
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
return true;
}
@ -89,15 +99,16 @@ public class SpanScorer extends Scorer {
@Override
public float score() throws IOException {
return docScorer.score(doc, freq);
float s = docScorer.score(doc, freq);
return s;
}
@Override
public int freq() throws IOException {
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.LeafReaderContext;
@ -31,19 +32,23 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
/** Matches spans containing a term. */
/** Matches spans containing a term.
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
*/
public class SpanTermQuery extends SpanQuery {
protected Term term;
/** Construct a SpanTermQuery matching the named term's spans. */
public SpanTermQuery(Term term) { this.term = term; }
public SpanTermQuery(Term term) {
this.term = Objects.requireNonNull(term);
}
/** Return the term whose spans are matched. */
public Term getTerm() { return term; }
@Override
public String getField() { return term.field(); }
@Override
public void extractTerms(Set<Term> terms) {
terms.add(term);
@ -64,7 +69,7 @@ public class SpanTermQuery extends SpanQuery {
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((term == null) ? 0 : term.hashCode());
result = prime * result + term.hashCode();
return result;
}
@ -77,12 +82,7 @@ public class SpanTermQuery extends SpanQuery {
if (getClass() != obj.getClass())
return false;
SpanTermQuery other = (SpanTermQuery) obj;
if (term == null) {
if (other.term != null)
return false;
} else if (!term.equals(other.term))
return false;
return true;
return term.equals(other.term);
}
@Override
@ -95,7 +95,7 @@ public class SpanTermQuery extends SpanQuery {
final Terms terms = context.reader().terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term.bytes())) {
if (termsEnum.seekExact(term.bytes())) {
state = termsEnum.termState();
} else {
state = null;
@ -106,14 +106,14 @@ public class SpanTermQuery extends SpanQuery {
} else {
state = termContext.get(context.ord);
}
if (state == null) { // term is not present in that reader
return TermSpans.EMPTY_TERM_SPANS;
return null;
}
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS);
if (postings != null) {

View File

@ -51,7 +51,7 @@ public class SpanWeight extends Weight {
super(query);
this.similarity = searcher.getSimilarity();
this.query = query;
termContexts = new HashMap<>();
TreeSet<Term> terms = new TreeSet<>();
query.extractTerms(terms);
@ -66,8 +66,8 @@ public class SpanWeight extends Weight {
}
final String field = query.getField();
if (field != null) {
stats = similarity.computeWeight(query.getBoost(),
searcher.collectionStatistics(query.getField()),
stats = similarity.computeWeight(query.getBoost(),
searcher.collectionStatistics(query.getField()),
termStats);
}
}
@ -88,9 +88,9 @@ public class SpanWeight extends Weight {
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
if (stats == null) {
return null;
} else {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
}
Spans spans = query.getSpans(context, acceptDocs, termContexts);
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
}
@Override
@ -106,11 +106,11 @@ public class SpanWeight extends Weight {
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
result.setMatch(true);
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}

View File

@ -20,54 +20,44 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
/** Expert: an enumeration of span matches. Used to implement span searching.
* Each span represents a range of term positions within a document. Matches
* are enumerated in order, by increasing document number, within that by
* increasing start position and finally by increasing end position. */
public abstract class Spans {
/** Move to the next match, returning true iff any such exists. */
public abstract boolean next() throws IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
/** Skips to the first match beyond the current, whose document number is
* greater than or equal to <i>target</i>.
* <p>The behavior of this method is <b>undefined</b> when called with
* <code> target &le; current</code>, or after the iterator has exhausted.
* Both cases may result in unpredicted behavior.
* <p>Returns true iff there is such
* a match. <p>Behaves as if written:
* <pre class="prettyprint">
* boolean skipTo(int target) {
* do {
* if (!next())
* return false;
* } while (target &gt; doc());
* return true;
* }
* </pre>
* Most implementations are considerably more efficient than that.
*/
public abstract boolean skipTo(int target) throws IOException;
/** Iterates through combinations of start/end positions per-doc.
* Each start/end position represents a range of term positions within the current document.
* These are enumerated in order, by increasing document number, within that by
* increasing start position and finally by increasing end position.
*/
public abstract class Spans extends DocIdSetIterator {
public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
/** Returns the document number of the current match. Initially invalid. */
public abstract int doc();
/** Returns the start position of the current match. Initially invalid. */
public abstract int start();
/** Returns the end position of the current match. Initially invalid. */
public abstract int end();
/**
* Returns the payload data for the current span.
* This is invalid until {@link #next()} is called for
* the first time.
* Returns the next start position for the current doc.
* There is always at least one start/end position per doc.
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
*/
public abstract int nextStartPosition() throws IOException;
/**
* Returns the start position in the current doc, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
*/
public abstract int startPosition();
/**
* Returns the end position for the current start position, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
*/
public abstract int endPosition();
/**
* Returns the payload data for the current start/end position.
* This is only valid after {@link #nextStartPosition()}
* returned an available start position.
* This method must not be called more than once after each call
* of {@link #next()}. However, most payloads are loaded lazily,
* of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
* so if the payload data for the current position is not needed,
* this method may not be called at all for performance reasons. An ordered
* SpanQuery does not lazy load, so if you have payloads in your index and
* you do not want ordered SpanNearQuerys to collect payloads, you can
* disable collection with a constructor option.<br>
* this method may not be called at all for performance reasons.
* <br>
* Note that the return type is a collection, thus the ordering should not be relied upon.
* <br>
@ -76,25 +66,35 @@ public abstract class Spans {
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
* @throws IOException if there is a low-level I/O error
*/
// TODO: Remove warning after API has been finalized
public abstract Collection<byte[]> getPayload() throws IOException;
/**
* Checks if a payload can be loaded at this position.
* Checks if a payload can be loaded at the current start/end position.
* <p>
* Payloads can only be loaded once per call to
* {@link #next()}.
* {@link #nextStartPosition()}.
*
* @return true if there is a payload available at this position that can be loaded
* @return true if there is a payload available at this start/end position
* that can be loaded
*/
public abstract boolean isPayloadAvailable() throws IOException;
/**
* Returns the estimated cost of this spans.
* <p>
* This is generally an upper bound of the number of documents this iterator
* might match, but may be a rough heuristic, hardcoded value, or otherwise
* completely inaccurate.
* Optional method: Return a {@link TwoPhaseIterator} view of this
* {@link Spans}. A return value of {@code null} indicates that
* two-phase iteration is not supported.
*
* Note that the returned {@link TwoPhaseIterator}'s
* {@link TwoPhaseIterator#approximation() approximation} must
* advance synchronously with this iterator: advancing the approximation must
* advance this iterator and vice-versa.
*
* Implementing this method is typically useful on {@link Spans}s
* that have a high per-document overhead in order to confirm matches.
*
* The default implementation returns {@code null}.
*/
public abstract long cost();
public TwoPhaseIterator asTwoPhaseIterator() {
return null;
}
}

View File

@ -24,10 +24,12 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collections;
import java.util.Collection;
import java.util.Objects;
/**
* Expert:
* Public for extension only
* Public for extension only.
* This does not work correctly for terms that indexed at position Integer.MAX_VALUE.
*/
public class TermSpans extends Spans {
protected final PostingsEnum postings;
@ -39,65 +41,67 @@ public class TermSpans extends Spans {
protected boolean readPayload;
public TermSpans(PostingsEnum postings, Term term) {
this.postings = postings;
this.term = term;
doc = -1;
}
// only for EmptyTermSpans (below)
TermSpans() {
term = null;
postings = null;
this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term);
this.doc = -1;
this.position = -1;
}
@Override
public boolean next() throws IOException {
if (count == freq) {
if (postings == null) {
return false;
}
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
public int nextDoc() throws IOException {
doc = postings.nextDoc();
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
freq = postings.freq();
assert freq >= 1;
count = 0;
}
position = postings.nextPosition();
count++;
readPayload = false;
return true;
}
@Override
public boolean skipTo(int target) throws IOException {
assert target > doc;
doc = postings.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
freq = postings.freq();
count = 0;
position = postings.nextPosition();
count++;
readPayload = false;
return true;
}
@Override
public int doc() {
position = -1;
return doc;
}
@Override
public int start() {
public int advance(int target) throws IOException {
assert target > doc;
doc = postings.advance(target);
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
freq = postings.freq();
assert freq >= 1;
count = 0;
}
position = -1;
return doc;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextStartPosition() throws IOException {
if (count == freq) {
assert position != NO_MORE_POSITIONS;
return position = NO_MORE_POSITIONS;
}
int prevPosition = position;
position = postings.nextPosition();
assert position >= prevPosition : "prevPosition="+prevPosition+" > position="+position;
assert position != NO_MORE_POSITIONS; // int endPosition not possible
count++;
readPayload = false;
return position;
}
@Override
public int end() {
return position + 1;
public int startPosition() {
return position;
}
@Override
public int endPosition() {
return (position == -1) ? -1
: (position != NO_MORE_POSITIONS) ? position + 1
: NO_MORE_POSITIONS;
}
@Override
@ -105,7 +109,6 @@ public class TermSpans extends Spans {
return postings.cost();
}
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
final BytesRef payload = postings.getPayload();
@ -120,7 +123,6 @@ public class TermSpans extends Spans {
return Collections.singletonList(bytes);
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return readPayload == false && postings.getPayload() != null;
@ -129,55 +131,12 @@ public class TermSpans extends Spans {
@Override
public String toString() {
return "spans(" + term.toString() + ")@" +
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
(doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC"
: doc + " - " + (position == NO_MORE_POSITIONS ? "ENDPOS" : position));
}
public PostingsEnum getPostings() {
return postings;
}
private static final class EmptyTermSpans extends TermSpans {
@Override
public boolean next() {
return false;
}
@Override
public boolean skipTo(int target) {
return false;
}
@Override
public int doc() {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int start() {
return -1;
}
@Override
public int end() {
return -1;
}
@Override
public Collection<byte[]> getPayload() {
return null;
}
@Override
public boolean isPayloadAvailable() {
return false;
}
@Override
public long cost() {
return 0;
}
}
public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans();
}

View File

@ -18,14 +18,18 @@
/**
* The calculus of spans.
*
* <p>A span is a <code>&lt;doc,startPosition,endPosition&gt;</code> tuple.</p>
* <p>A span is a <code>&lt;doc,startPosition,endPosition&gt;</code> tuple that is enumerated by
* class {@link org.apache.lucene.search.spans.Spans Spans}.
* </p>
*
* <p>The following span query operators are implemented:
*
* <ul>
*
* <li>A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans
* containing a particular {@link org.apache.lucene.index.Term Term}.</li>
* containing a particular {@link org.apache.lucene.index.Term Term}.
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
* </li>
*
* <li> A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans
* which occur near one another, and can be used to implement things like

View File

@ -238,18 +238,20 @@ public class TestPositionIncrement extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\ngetPayloadSpans test");
}
Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
while (pspans.next()) {
if (VERBOSE) {
System.out.println("doc " + pspans.doc() + ": span " + pspans.start()
+ " to " + pspans.end());
}
Collection<byte[]> payloads = pspans.getPayload();
sawZero |= pspans.start() == 0;
for (byte[] bytes : payloads) {
count++;
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (VERBOSE) {
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
+ " to " + pspans.endPosition());
}
Collection<byte[]> payloads = pspans.getPayload();
sawZero |= pspans.startPosition() == 0;
for (byte[] bytes : payloads) {
count++;
if (VERBOSE) {
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
}
}
}
}
@ -257,20 +259,20 @@ public class TestPositionIncrement extends LuceneTestCase {
assertEquals(5, count);
// System.out.println("\ngetSpans test");
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
count = 0;
sawZero = false;
while (spans.next()) {
count++;
sawZero |= spans.start() == 0;
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
// spans.end());
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
count++;
sawZero |= spans.startPosition() == 0;
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
// spans.end());
}
}
assertEquals(4, count);
assertTrue(sawZero);
// System.out.println("\nPayloadSpanUtil test");
sawZero = false;
PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
Collection<byte[]> pls = psu.getPayloadsForQuery(snq);

View File

@ -160,7 +160,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
}
CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
/*float score = hits.score(0);
for (int i =1; i < hits.length(); i++)
@ -211,13 +211,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.next()) {
count++;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
count++;
}
}
assertTrue(count + " does not equal: " + 200, count == 200);
}
@ -253,13 +255,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.next()) {
count++;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
count++;
}
}
reader.close();
}

View File

@ -24,7 +24,6 @@ import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
@ -42,27 +41,32 @@ final class JustCompileSearchSpans {
static final class JustCompileSpans extends Spans {
@Override
public int doc() {
public int docID() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int end() {
public int nextDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean next() {
public int advance(int target) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int startPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean skipTo(int target) {
public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int start() {
public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@ -103,6 +107,36 @@ final class JustCompileSearchSpans {
static final class JustCompilePayloadSpans extends Spans {
@Override
public int docID() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int startPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public Collection<byte[]> getPayload() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -113,31 +147,6 @@ final class JustCompileSearchSpans {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int doc() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int end() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean next() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean skipTo(int target) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int start() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -147,7 +156,7 @@ final class JustCompileSearchSpans {
static final class JustCompileSpanScorer extends SpanScorer {
protected JustCompileSpanScorer(Spans spans, Weight weight,
protected JustCompileSpanScorer(Spans spans, SpanWeight weight,
Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}

View File

@ -18,19 +18,18 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
/**
*
@ -39,141 +38,20 @@ import org.apache.lucene.search.DocIdSetIterator;
* NOTE: This should be used for testing purposes only
* @lucene.internal
*/
public class MultiSpansWrapper extends Spans { // can't be package private due to payloads
public class MultiSpansWrapper {
private SpanQuery query;
private List<LeafReaderContext> leaves;
private int leafOrd = 0;
private Spans current;
private Map<Term,TermContext> termContexts;
private final int numLeaves;
private MultiSpansWrapper(List<LeafReaderContext> leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
this.query = query;
this.leaves = leaves;
this.numLeaves = leaves.size();
this.termContexts = termContexts;
}
public static Spans wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
LeafReaderContext lrContext = lr.getContext();
Query rewrittenQuery = spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
HashSet<Term> termSet = new HashSet<>();
rewrittenQuery.extractTerms(termSet);
Map<Term,TermContext> termContexts = new HashMap<>();
TreeSet<Term> terms = new TreeSet<>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(topLevelReaderContext, term));
for (Term term: termSet) {
TermContext termContext = TermContext.build(lrContext, term);
termContexts.put(term, termContext);
}
final List<LeafReaderContext> leaves = topLevelReaderContext.leaves();
if(leaves.size() == 1) {
final LeafReaderContext ctx = leaves.get(0);
return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
}
return new MultiSpansWrapper(leaves, query, termContexts);
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
return actSpans;
}
@Override
public boolean next() throws IOException {
if (leafOrd >= numLeaves) {
return false;
}
if (current == null) {
final LeafReaderContext ctx = leaves.get(leafOrd);
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
}
while(true) {
if (current.next()) {
return true;
}
if (++leafOrd < numLeaves) {
final LeafReaderContext ctx = leaves.get(leafOrd);
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
} else {
current = null;
break;
}
}
return false;
}
@Override
public boolean skipTo(int target) throws IOException {
if (leafOrd >= numLeaves) {
return false;
}
int subIndex = ReaderUtil.subIndex(target, leaves);
assert subIndex >= leafOrd;
if (subIndex != leafOrd) {
final LeafReaderContext ctx = leaves.get(subIndex);
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
leafOrd = subIndex;
} else if (current == null) {
final LeafReaderContext ctx = leaves.get(leafOrd);
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
}
while (true) {
if (target < leaves.get(leafOrd).docBase) {
// target was in the previous slice
if (current.next()) {
return true;
}
} else if (current.skipTo(target - leaves.get(leafOrd).docBase)) {
return true;
}
if (++leafOrd < numLeaves) {
final LeafReaderContext ctx = leaves.get(leafOrd);
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
} else {
current = null;
break;
}
}
return false;
}
@Override
public int doc() {
if (current == null) {
return DocIdSetIterator.NO_MORE_DOCS;
}
return current.doc() + leaves.get(leafOrd).docBase;
}
@Override
public int start() {
if (current == null) {
return DocIdSetIterator.NO_MORE_DOCS;
}
return current.start();
}
@Override
public int end() {
if (current == null) {
return DocIdSetIterator.NO_MORE_DOCS;
}
return current.end();
}
@Override
public Collection<byte[]> getPayload() throws IOException {
if (current == null) {
return Collections.emptyList();
}
return current.getPayload();
}
@Override
public boolean isPayloadAvailable() throws IOException {
if (current == null) {
return false;
}
return current.isPayloadAvailable();
}
@Override
public long cost() {
return Integer.MAX_VALUE; // just for tests
}
}

View File

@ -651,47 +651,6 @@ public class TestBasics extends LuceneTestCase {
1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797});
}
@Test
public void testSpansSkipTo() throws Exception {
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
assertTrue(s1.next());
assertTrue(s2.next());
boolean hasMore = true;
do {
hasMore = skipToAccordingToJavaDocs(s1, s1.doc() + 1);
assertEquals(hasMore, s2.skipTo(s2.doc() + 1));
assertEquals(s1.doc(), s2.doc());
} while (hasMore);
}
/** Skips to the first match beyond the current, whose document number is
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
* a match. <p>Behaves as if written: <pre>
* boolean skipTo(int target) {
* do {
* if (!next())
* return false;
* } while (target &gt; doc());
* return true;
* }
* </pre>
*/
private boolean skipToAccordingToJavaDocs(Spans s, int target)
throws Exception {
do {
if (!s.next())
return false;
} while (target > s.doc());
return true;
}
private void checkHits(Query query, int[] results) throws IOException {
CheckHits.checkHits(random(), query, "field", searcher, results);
}

View File

@ -258,37 +258,19 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
SpanQuery q2 = new SpanTermQuery(new Term("first", "james"));
SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender"));
check(q, new int[] { 0, 1, 2, 3, 4 });
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.next());
assertEquals(s(0,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(1,0,1), s(span));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(true, span.next());
assertEquals(s(1,1,2), s(span));
assertEquals(true, span.next());
assertEquals(s(2,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(2,1,2), s(span));
assertEquals(true, span.next());
assertEquals(s(2,2,3), s(span));
assertEquals(true, span.next());
assertEquals(s(3,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(4,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(4,1,2), s(span));
assertEquals(false, span.next());
TestSpans.tstNextSpans(span, 0,0,1);
TestSpans.tstNextSpans(span, 1,0,1);
TestSpans.tstNextSpans(span, 1,1,2);
TestSpans.tstNextSpans(span, 2,0,1);
TestSpans.tstNextSpans(span, 2,1,2);
TestSpans.tstNextSpans(span, 2,2,3);
TestSpans.tstNextSpans(span, 3,0,1);
TestSpans.tstNextSpans(span, 4,0,1);
TestSpans.tstNextSpans(span, 4,1,2);
TestSpans.tstEndSpans(span);
}
public void testSpans1() throws Exception {
@ -300,19 +282,22 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
check(qA, new int[] { 0, 1, 2, 4 });
check(qB, new int[] { 0, 1, 2, 4 });
Spans spanA = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qA);
Spans spanB = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qB);
Spans spanA = MultiSpansWrapper.wrap(searcher.getIndexReader(), qA);
Spans spanB = MultiSpansWrapper.wrap(searcher.getIndexReader(), qB);
while (spanA.next()) {
assertTrue("spanB not still going", spanB.next());
assertEquals("spanA not equal spanB", s(spanA), s(spanB));
while (spanA.nextDoc() != Spans.NO_MORE_DOCS) {
assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc());
while (spanA.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
assertEquals("spanB start position", spanA.startPosition(), spanB.nextStartPosition());
assertEquals("spanB end position", spanA.endPosition(), spanB.endPosition());
}
assertEquals("spanB start position", Spans.NO_MORE_POSITIONS, spanB.nextStartPosition());
}
assertTrue("spanB still going even tough spanA is done", !(spanB.next()));
assertEquals("spanB end doc", Spans.NO_MORE_DOCS, spanB.nextDoc());
}
public void testSpans2() throws Exception {
assumeTrue("Broken scoring: LUCENE-3723",
assumeTrue("Broken scoring: LUCENE-3723",
searcher.getSimilarity() instanceof TFIDFSimilarity);
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
@ -322,30 +307,17 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
{ new FieldMaskingSpanQuery(qA, "id"),
new FieldMaskingSpanQuery(qB, "id") }, -1, false );
check(q, new int[] { 0, 1, 2, 3 });
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.next());
assertEquals(s(0,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(1,1,2), s(span));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(true, span.next());
assertEquals(s(2,0,1), s(span));
assertEquals(true, span.next());
assertEquals(s(2,2,3), s(span));
assertEquals(true, span.next());
assertEquals(s(3,0,1), s(span));
assertEquals(false, span.next());
TestSpans.tstNextSpans(span, 0,0,1);
TestSpans.tstNextSpans(span, 1,1,2);
TestSpans.tstNextSpans(span, 2,0,1);
TestSpans.tstNextSpans(span, 2,2,3);
TestSpans.tstNextSpans(span, 3,0,1);
TestSpans.tstEndSpans(span);
}
public String s(Spans span) {
return s(span.doc(), span.start(), span.end());
}
public String s(int doc, int start, int end) {
return "s(" + doc + "," + start + "," + end +")";
}

View File

@ -106,7 +106,7 @@ public class TestNearSpansOrdered extends LuceneTestCase {
}
public String s(Spans span) {
return s(span.doc(), span.start(), span.end());
return s(span.docID(), span.startPosition(), span.endPosition());
}
public String s(int doc, int start, int end) {
return "s(" + doc + "," + start + "," + end +")";
@ -114,12 +114,10 @@ public class TestNearSpansOrdered extends LuceneTestCase {
public void testNearSpansNext() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.next());
assertEquals(s(0,0,3), s(span));
assertEquals(true, span.next());
assertEquals(s(1,0,4), s(span));
assertEquals(false, span.next());
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
TestSpans.tstNextSpans(span,0,0,3);
TestSpans.tstNextSpans(span,1,0,4);
TestSpans.tstEndSpans(span);
}
/**
@ -127,51 +125,58 @@ public class TestNearSpansOrdered extends LuceneTestCase {
* same as next -- it's only applicable in this case since we know doc
* does not contain more than one span
*/
public void testNearSpansSkipToLikeNext() throws Exception {
public void testNearSpansAdvanceLikeNext() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.skipTo(0));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(0, span.advance(0));
assertEquals(0, span.nextStartPosition());
assertEquals(s(0,0,3), s(span));
assertEquals(true, span.skipTo(1));
assertEquals(1, span.advance(1));
assertEquals(0, span.nextStartPosition());
assertEquals(s(1,0,4), s(span));
assertEquals(false, span.skipTo(2));
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
}
public void testNearSpansNextThenSkipTo() throws Exception {
public void testNearSpansNextThenAdvance() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.next());
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
assertEquals(0, span.nextStartPosition());
assertEquals(s(0,0,3), s(span));
assertEquals(true, span.skipTo(1));
assertNotSame(Spans.NO_MORE_DOCS, span.advance(1));
assertEquals(0, span.nextStartPosition());
assertEquals(s(1,0,4), s(span));
assertEquals(false, span.next());
assertEquals(Spans.NO_MORE_DOCS, span.nextDoc());
}
public void testNearSpansNextThenSkipPast() throws Exception {
public void testNearSpansNextThenAdvancePast() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.next());
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
assertEquals(0, span.nextStartPosition());
assertEquals(s(0,0,3), s(span));
assertEquals(false, span.skipTo(2));
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
}
public void testNearSpansSkipPast() throws Exception {
public void testNearSpansAdvancePast() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(false, span.skipTo(2));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
}
public void testNearSpansSkipTo0() throws Exception {
public void testNearSpansAdvanceTo0() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.skipTo(0));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(0, span.advance(0));
assertEquals(0, span.nextStartPosition());
assertEquals(s(0,0,3), s(span));
}
public void testNearSpansSkipTo1() throws Exception {
public void testNearSpansAdvanceTo1() throws Exception {
SpanNearQuery q = makeQuery();
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
assertEquals(true, span.skipTo(1));
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
assertEquals(1, span.advance(1));
assertEquals(0, span.nextStartPosition());
assertEquals(s(1,0,4), s(span));
}

View File

@ -67,12 +67,12 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq;
Spans spans;
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
spans = MultiSpansWrapper.wrap(indexReader, stq);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 100, 1, 1, 1);
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
spans = MultiSpansWrapper.wrap(indexReader, stq);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 100, 0, 0, 0);
}
@ -83,7 +83,7 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanFirstQuery sfq;
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
sfq = new SpanFirstQuery(match, 2);
Spans spans = MultiSpansWrapper.wrap(indexReader.getContext(), sfq);
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq);
checkSpans(spans, 109, 1, 1, 1);
//Test more complicated subclause
SpanQuery[] clauses = new SpanQuery[2];
@ -91,11 +91,11 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
match = new SpanNearQuery(clauses, 0, true);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
match = new SpanNearQuery(clauses, 0, false);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
}
@ -119,7 +119,7 @@ public class TestPayloadSpans extends LuceneTestCase {
writer.close();
checkSpans(MultiSpansWrapper.wrap(reader.getContext(), snq), 1,new int[]{2});
checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2});
reader.close();
directory.close();
}
@ -129,10 +129,8 @@ public class TestPayloadSpans extends LuceneTestCase {
Spans spans;
IndexSearcher searcher = getSearcher();
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), stq);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 0, null);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq);
assertNull(spans);
SpanQuery[] clauses = new SpanQuery[3];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
@ -140,7 +138,7 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{3,3});
@ -151,7 +149,7 @@ public class TestPayloadSpans extends LuceneTestCase {
spanNearQuery = new SpanNearQuery(clauses, 6, true);
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[]{3});
@ -174,7 +172,7 @@ public class TestPayloadSpans extends LuceneTestCase {
// yy within 6 of xx within 6 of rr
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{3,3});
closeIndexReader.close();
@ -205,7 +203,7 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses3[1] = snq;
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[]{3});
@ -243,7 +241,7 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{8, 8});
closeIndexReader.close();
@ -267,16 +265,18 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
}
}
}
}
@ -303,15 +303,18 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte[] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
}
}
}
}
@ -338,16 +341,18 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
}
}
}
}
@ -395,31 +400,22 @@ public class TestPayloadSpans extends LuceneTestCase {
//each position match should have a span associated with it, since there is just one underlying term query, there should
//only be one entry in the span
int seen = 0;
while (spans.next() == true)
{
//if we expect payloads, then isPayloadAvailable should be true
if (expectedNumPayloads > 0) {
assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable()
+ " and it should be: " + (expectedNumPayloads > 0),
spans.isPayloadAvailable() == true);
} else {
assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false);
}
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads);
for (final byte [] thePayload : payload) {
assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength,
thePayload.length == expectedPayloadLength);
assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte);
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable());
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
assertEquals("payload size", expectedNumPayloads, payload.size());
for (final byte [] thePayload : payload) {
assertEquals("payload length", expectedPayloadLength, thePayload.length);
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
}
}
seen++;
}
seen++;
}
assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
assertEquals("expectedNumSpans", expectedNumSpans, seen);
}
private IndexSearcher getSearcher() throws Exception {
@ -446,27 +442,28 @@ public class TestPayloadSpans extends LuceneTestCase {
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
int cnt = 0;
while (spans.next() == true) {
if(VERBOSE)
System.out.println("\nSpans Dump --");
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
+ new String(bytes, StandardCharsets.UTF_8));
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if(VERBOSE)
System.out.println("\nSpans Dump --");
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
+ new String(bytes, StandardCharsets.UTF_8));
}
}
assertEquals("payload size", numPayloads[cnt], payload.size());
} else { // no payload available
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 );
}
assertEquals(numPayloads[cnt],payload.size());
} else {
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 );
cnt++;
}
cnt++;
}
assertEquals(numSpans, cnt);
assertEquals("expected numSpans", numSpans, cnt);
}
final class PayloadAnalyzer extends Analyzer {

View File

@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriter;
@ -201,117 +200,55 @@ public class TestSpans extends LuceneTestCase {
makeSpanTermQuery("t3") },
slop,
ordered);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
assertTrue("first range", spans.next());
assertEquals("first doc", 11, spans.doc());
assertEquals("first start", 0, spans.start());
assertEquals("first end", 4, spans.end());
assertEquals("first doc", 11, spans.nextDoc());
assertEquals("first start", 0, spans.nextStartPosition());
assertEquals("first end", 4, spans.endPosition());
assertTrue("second range", spans.next());
assertEquals("second doc", 11, spans.doc());
assertEquals("second start", 2, spans.start());
assertEquals("second end", 6, spans.end());
assertEquals("second start", 2, spans.nextStartPosition());
assertEquals("second end", 6, spans.endPosition());
assertFalse("third range", spans.next());
tstEndSpans(spans);
}
public void testSpanNearUnOrdered() throws Exception {
//See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
SpanNearQuery snq;
snq = new SpanNearQuery(
SpanNearQuery senq;
senq = new SpanNearQuery(
new SpanQuery[] {
makeSpanTermQuery("u1"),
makeSpanTermQuery("u2") },
0,
false);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 4, spans.doc());
assertEquals("start", 1, spans.start());
assertEquals("end", 3, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 5, spans.doc());
assertEquals("start", 2, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 8, spans.doc());
assertEquals("start", 2, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 9, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 2, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 10, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 2, spans.end());
assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false);
Spans spans = MultiSpansWrapper.wrap(reader, senq);
tstNextSpans(spans, 4, 1, 3);
tstNextSpans(spans, 5, 2, 4);
tstNextSpans(spans, 8, 2, 4);
tstNextSpans(spans, 9, 0, 2);
tstNextSpans(spans, 10, 0, 2);
tstEndSpans(spans);
SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"),
makeSpanTermQuery("u2")}, 0, false);
snq = new SpanNearQuery(
senq = new SpanNearQuery(
new SpanQuery[] {
u1u2,
makeSpanTermQuery("u2")
},
1,
false);
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 4, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 3, spans.end());
assertTrue("Does not have next and it should", spans.next());
//unordered spans can be subsets
assertEquals("doc", 4, spans.doc());
assertEquals("start", 1, spans.start());
assertEquals("end", 3, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 5, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 5, spans.doc());
assertEquals("start", 2, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 8, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 8, spans.doc());
assertEquals("start", 2, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 9, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 2, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 9, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 4, spans.end());
assertTrue("Does not have next and it should", spans.next());
assertEquals("doc", 10, spans.doc());
assertEquals("start", 0, spans.start());
assertEquals("end", 2, spans.end());
assertTrue("Has next and it shouldn't", spans.next() == false);
spans = MultiSpansWrapper.wrap(reader, senq);
tstNextSpans(spans, 4, 0, 3);
tstNextSpans(spans, 4, 1, 3); // unordered spans can be subsets
tstNextSpans(spans, 5, 0, 4);
tstNextSpans(spans, 5, 2, 4);
tstNextSpans(spans, 8, 0, 4);
tstNextSpans(spans, 8, 2, 4);
tstNextSpans(spans, 9, 0, 2);
tstNextSpans(spans, 9, 0, 4);
tstNextSpans(spans, 10, 0, 2);
tstEndSpans(spans);
}
@ -321,21 +258,40 @@ public class TestSpans extends LuceneTestCase {
for (int i = 0; i < terms.length; i++) {
sqa[i] = makeSpanTermQuery(terms[i]);
}
return MultiSpansWrapper.wrap(searcher.getTopReaderContext(), new SpanOrQuery(sqa));
return MultiSpansWrapper.wrap(searcher.getIndexReader(), new SpanOrQuery(sqa));
}
private void tstNextSpans(Spans spans, int doc, int start, int end)
throws Exception {
assertTrue("next", spans.next());
assertEquals("doc", doc, spans.doc());
assertEquals("start", start, spans.start());
assertEquals("end", end, spans.end());
public static void tstNextSpans(Spans spans, int doc, int start, int end) throws IOException {
if (spans.docID() >= doc) {
assertEquals("docId", doc, spans.docID());
} else { // nextDoc needed before testing start/end
if (spans.docID() >= 0) {
assertEquals("nextStartPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.nextStartPosition());
assertEquals("endPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.endPosition());
}
assertEquals("nextDoc", doc, spans.nextDoc());
if (doc != Spans.NO_MORE_DOCS) {
assertEquals("first startPosition", -1, spans.startPosition());
assertEquals("first endPosition", -1, spans.endPosition());
}
}
if (doc != Spans.NO_MORE_DOCS) {
assertEquals("nextStartPosition", start, spans.nextStartPosition());
assertEquals("startPosition", start, spans.startPosition());
assertEquals("endPosition", end, spans.endPosition());
}
}
public static void tstEndSpans(Spans spans) throws Exception {
if (spans != null) { // null Spans is empty
tstNextSpans(spans, Spans.NO_MORE_DOCS, -2, -2); // start and end positions will be ignored
}
}
public void testSpanOrEmpty() throws Exception {
Spans spans = orSpans(new String[0]);
assertFalse("empty next", spans.next());
tstEndSpans(spans);
SpanOrQuery a = new SpanOrQuery();
SpanOrQuery b = new SpanOrQuery();
assertTrue("empty should equal", a.equals(b));
@ -344,24 +300,7 @@ public class TestSpans extends LuceneTestCase {
public void testSpanOrSingle() throws Exception {
Spans spans = orSpans(new String[] {"w5"});
tstNextSpans(spans, 0, 4, 5);
assertFalse("final next", spans.next());
}
public void testSpanOrMovesForward() throws Exception {
Spans spans = orSpans(new String[] {"w1", "xx"});
spans.next();
int doc = spans.doc();
assertEquals(0, doc);
spans.skipTo(0);
doc = spans.doc();
// LUCENE-1583:
// according to Spans, a skipTo to the same doc or less
// should still call next() on the underlying Spans
assertEquals(1, doc);
tstEndSpans(spans);
}
public void testSpanOrDouble() throws Exception {
@ -370,17 +309,15 @@ public class TestSpans extends LuceneTestCase {
tstNextSpans(spans, 2, 3, 4);
tstNextSpans(spans, 3, 4, 5);
tstNextSpans(spans, 7, 3, 4);
assertFalse("final next", spans.next());
tstEndSpans(spans);
}
public void testSpanOrDoubleSkip() throws Exception {
public void testSpanOrDoubleAdvance() throws Exception {
Spans spans = orSpans(new String[] {"w5", "yy"});
assertTrue("initial skipTo", spans.skipTo(3));
assertEquals("doc", 3, spans.doc());
assertEquals("start", 4, spans.start());
assertEquals("end", 5, spans.end());
assertEquals("initial advance", 3, spans.advance(3));
tstNextSpans(spans, 3, 4, 5);
tstNextSpans(spans, 7, 3, 4);
assertFalse("final next", spans.next());
tstEndSpans(spans);
}
public void testSpanOrUnused() throws Exception {
@ -389,7 +326,7 @@ public class TestSpans extends LuceneTestCase {
tstNextSpans(spans, 2, 3, 4);
tstNextSpans(spans, 3, 4, 5);
tstNextSpans(spans, 7, 3, 4);
assertFalse("final next", spans.next());
tstEndSpans(spans);
}
public void testSpanOrTripleSameDoc() throws Exception {
@ -400,7 +337,7 @@ public class TestSpans extends LuceneTestCase {
tstNextSpans(spans, 11, 3, 4);
tstNextSpans(spans, 11, 4, 5);
tstNextSpans(spans, 11, 5, 6);
assertFalse("final next", spans.next());
tstEndSpans(spans);
}
public void testSpanScorerZeroSloppyFreq() throws Exception {
@ -542,11 +479,15 @@ public class TestSpans extends LuceneTestCase {
SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
int i = 0;
while (spans.next()){
i++;
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS){
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
i++;
}
}
}
return i;
}

View File

@ -0,0 +1,187 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests Spans (v2)
*
*/
public class TestSpansEnum extends LuceneTestCase {
private static IndexSearcher searcher;
private static IndexReader reader;
private static Directory directory;
static final class SimplePayloadFilter extends TokenFilter {
int pos;
final PayloadAttribute payloadAttr;
final CharTermAttribute termAttr;
public SimplePayloadFilter(TokenStream input) {
super(input);
pos = 0;
payloadAttr = input.addAttribute(PayloadAttribute.class);
termAttr = input.addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8)));
pos++;
return true;
} else {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
pos = 0;
}
}
static Analyzer simplePayloadAnalyzer;
@BeforeClass
public static void beforeClass() throws Exception {
simplePayloadAnalyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
}
};
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(simplePayloadAnalyzer)
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
//writer.infoStream = System.out;
for (int i = 0; i < 10; i++) {
Document doc = new Document();
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
for (int i = 100; i < 110; i++) {
Document doc = new Document(); // doc id 10-19 have 100-109
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
reader = writer.getReader();
searcher = newSearcher(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
directory.close();
searcher = null;
reader = null;
directory = null;
simplePayloadAnalyzer = null;
}
private void checkHits(Query query, int[] results) throws IOException {
CheckHits.checkHits(random(), query, "field", searcher, results);
}
SpanTermQuery spanTQ(String term) {
return new SpanTermQuery(new Term("field", term));
}
@Test
public void testSpansEnumOr1() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t2 = spanTQ("two");
SpanOrQuery soq = new SpanOrQuery(t1, t2);
checkHits(soq, new int[] {1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
}
@Test
public void testSpansEnumOr2() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t11 = spanTQ("eleven");
SpanOrQuery soq = new SpanOrQuery(t1, t11);
checkHits(soq, new int[] {1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
}
@Test
public void testSpansEnumOr3() throws Exception {
SpanTermQuery t12 = spanTQ("twelve");
SpanTermQuery t11 = spanTQ("eleven");
SpanOrQuery soq = new SpanOrQuery(t12, t11);
checkHits(soq, new int[] {});
}
@Test
public void testSpansEnumOrNot1() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t2 = spanTQ("two");
SpanOrQuery soq = new SpanOrQuery(t1, t2);
SpanNotQuery snq = new SpanNotQuery(soq, t1);
checkHits(snq, new int[] {2,12});
}
@Test
public void testSpansEnumNotBeforeAfter1() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t100 = spanTQ("hundred");
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 0);
checkHits(snq, new int[] {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // include all "one hundred ..."
}
@Test
public void testSpansEnumNotBeforeAfter2() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t100 = spanTQ("hundred");
SpanNotQuery snq = new SpanNotQuery(t100, t1, 1, 0);
checkHits(snq, new int[] {}); // exclude all "one hundred ..."
}
@Test
public void testSpansEnumNotBeforeAfter3() throws Exception {
SpanTermQuery t1 = spanTQ("one");
SpanTermQuery t100 = spanTQ("hundred");
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 1);
checkHits(snq, new int[] {10, 12, 13, 14, 15, 16, 17, 18, 19}); // exclude "one hundred one"
}
}

View File

@ -308,10 +308,11 @@ public class WeightedSpanTermExtractor {
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
// collect span positions
while (spans.next()) {
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
}
}
}
if (spanPositions.size() == 0) {

View File

@ -681,7 +681,7 @@ public class TestMultiTermHighlighting extends LuceneTestCase {
}
};
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
Query query = new SpanNearQuery(new SpanQuery[] { childQuery }, 0, true);
Query query = new SpanNearQuery(new SpanQuery[] { childQuery, childQuery }, 0, false);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);