mirror of https://github.com/apache/lucene.git
LUCENE-6308: cutover Spans to DISI, reuse ConjunctionDISI, use two-phased iteration
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
03f4970074
commit
d3cfba9b29
|
@ -32,6 +32,15 @@ API Changes
|
|||
* LUCENE-6067: Accountable.getChildResources has a default
|
||||
implementation returning the empty list. (Robert Muir)
|
||||
|
||||
======================= Lucene 5.2.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-6308: Span queries now share document conjunction/intersection
|
||||
code with boolean queries, and use two-phased iterators for
|
||||
faster intersection by avoiding loading positions in certain cases.
|
||||
(Paul Elschot, Robert Muir via Mike McCandless)
|
||||
|
||||
======================= Lucene 5.1.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -23,8 +23,14 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
class ConjunctionDISI extends DocIdSetIterator {
|
||||
/** A conjunction of DocIdSetIterators.
|
||||
* This iterates over the doc ids that are present in each given DocIdSetIterator.
|
||||
* <br>Public only for use in {@link org.apache.lucene.search.spans}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class ConjunctionDISI extends DocIdSetIterator {
|
||||
|
||||
/** Create a conjunction over the provided iterators, taking advantage of
|
||||
* {@link TwoPhaseIterator}. */
|
||||
|
@ -32,18 +38,16 @@ class ConjunctionDISI extends DocIdSetIterator {
|
|||
final List<DocIdSetIterator> allIterators = new ArrayList<>();
|
||||
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
|
||||
for (DocIdSetIterator iterator : iterators) {
|
||||
if (iterator instanceof Scorer) {
|
||||
// if we have a scorer, check if it supports two-phase iteration
|
||||
TwoPhaseIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
||||
if (twoPhaseIterator != null) {
|
||||
// Note:
|
||||
allIterators.add(twoPhaseIterator.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIterator);
|
||||
} else {
|
||||
allIterators.add(iterator);
|
||||
}
|
||||
} else {
|
||||
// no approximation support, use the iterator as-is
|
||||
TwoPhaseIterator twoPhaseIterator = null;
|
||||
if (iterator instanceof Scorer) {
|
||||
twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
||||
} else if (iterator instanceof Spans) {
|
||||
twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
|
||||
}
|
||||
if (twoPhaseIterator != null) {
|
||||
allIterators.add(twoPhaseIterator.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIterator);
|
||||
} else { // no approximation support, use the iterator as-is
|
||||
allIterators.add(iterator);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.search.ComplexExplanation;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
@ -71,7 +70,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadNearSpanWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
@ -113,7 +112,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
int result = super.hashCode() ^ getClass().hashCode();
|
||||
result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
|
||||
result = prime * result + ((function == null) ? 0 : function.hashCode());
|
||||
return result;
|
||||
|
@ -149,8 +148,10 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||
similarity, similarity.simScorer(stats, context));
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -188,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
protected float payloadScore;
|
||||
private int payloadsSeen;
|
||||
|
||||
protected PayloadNearSpanScorer(Spans spans, Weight weight,
|
||||
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
|
||||
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
this.spans = spans;
|
||||
|
@ -200,13 +201,13 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
if (subSpans[i] instanceof NearSpansOrdered) {
|
||||
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
|
||||
subSpans[i].start(), subSpans[i].end());
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
|
||||
} else if (subSpans[i] instanceof NearSpansUnordered) {
|
||||
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
|
||||
subSpans[i].start(), subSpans[i].end());
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
|
||||
}
|
||||
|
@ -233,7 +234,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
scratch.length = thePayload.length;
|
||||
payloadScore = function.currentScore(doc, fieldName, start, end,
|
||||
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
|
||||
spans.start(), spans.end(), scratch));
|
||||
spans.startPosition(), spans.endPosition(), scratch));
|
||||
++payloadsSeen;
|
||||
}
|
||||
}
|
||||
|
@ -241,22 +242,20 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
//
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
do {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
more = spans.next();
|
||||
} while (more && (doc == spans.doc()));
|
||||
return true;
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.endPosition() - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -169,7 +169,7 @@ public class PayloadSpanUtil {
|
|||
final boolean inorder = (slop == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
|
||||
inorder);
|
||||
inorder);
|
||||
sp.setBoost(query.getBoost());
|
||||
getPayloads(payloads, sp);
|
||||
}
|
||||
|
@ -186,11 +186,15 @@ public class PayloadSpanUtil {
|
|||
}
|
||||
for (LeafReaderContext leafReaderContext : context.leaves()) {
|
||||
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
|
||||
while (spans.next() == true) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
for (byte [] bytes : payload) {
|
||||
payloads.add(bytes);
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
for (byte [] bytes : payload) {
|
||||
payloads.add(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -26,10 +27,10 @@ import org.apache.lucene.search.ComplexExplanation;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -60,14 +61,14 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
public PayloadTermQuery(Term term, PayloadFunction function,
|
||||
boolean includeSpanScore) {
|
||||
boolean includeSpanScore) {
|
||||
super(term);
|
||||
this.function = function;
|
||||
this.function = Objects.requireNonNull(function);
|
||||
this.includeSpanScore = includeSpanScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadTermWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
@ -79,9 +80,11 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||
this, similarity.simScorer(stats, context));
|
||||
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||
|
@ -90,45 +93,42 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
protected int payloadsSeen;
|
||||
private final TermSpans termSpans;
|
||||
|
||||
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
termSpans = spans;
|
||||
termSpans = spans; // CHECKME: generics to use SpansScorer.spans as TermSpans.
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
while (more && doc == spans.doc()) {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.endPosition() - startPos;
|
||||
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
numMatches++;
|
||||
processPayload(similarity);
|
||||
|
||||
more = spans.next();// this moves positions to the next match in this
|
||||
// document
|
||||
}
|
||||
return more || (freq != 0);
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return freq != 0;
|
||||
}
|
||||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
if (termSpans.isPayloadAvailable()) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
final PostingsEnum postings = termSpans.getPostings();
|
||||
payload = postings.getPayload();
|
||||
if (payload != null) {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
|
||||
} else {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
||||
}
|
||||
payloadsSeen++;
|
||||
|
||||
|
@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
|
||||
PayloadTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
|
@ -220,7 +220,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((function == null) ? 0 : function.hashCode());
|
||||
result = prime * result + function.hashCode();
|
||||
result = prime * result + (includeSpanScore ? 1231 : 1237);
|
||||
return result;
|
||||
}
|
||||
|
@ -234,14 +234,9 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
PayloadTermQuery other = (PayloadTermQuery) obj;
|
||||
if (function == null) {
|
||||
if (other.function != null)
|
||||
return false;
|
||||
} else if (!function.equals(other.function))
|
||||
return false;
|
||||
if (includeSpanScore != other.includeSpanScore)
|
||||
return false;
|
||||
return true;
|
||||
return function.equals(other.function);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return maskedQuery.createWeight(searcher, needsScores);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,10 +19,13 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/**
|
||||
* A {@link Spans} implementation which allows wrapping another spans instance
|
||||
* and override some selected methods.
|
||||
* A {@link Spans} implementation wrapping another spans instance,
|
||||
* allowing to override selected methods in a subclass.
|
||||
*/
|
||||
public class FilterSpans extends Spans {
|
||||
|
||||
|
@ -31,32 +34,37 @@ public class FilterSpans extends Spans {
|
|||
|
||||
/** Wrap the given {@link Spans}. */
|
||||
public FilterSpans(Spans in) {
|
||||
this.in = in;
|
||||
this.in = Objects.requireNonNull(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return in.next();
|
||||
public int nextDoc() throws IOException {
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
return in.skipTo(target);
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
return in.doc();
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return in.start();
|
||||
public int nextStartPosition() throws IOException {
|
||||
return in.nextStartPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return in.end();
|
||||
public int startPosition() {
|
||||
return in.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return in.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -79,4 +87,8 @@ public class FilterSpans extends Spans {
|
|||
return "Filter(" + in.toString() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.ConjunctionDISI;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Common super class for un/ordered Spans
|
||||
*/
|
||||
abstract class NearSpans extends Spans {
|
||||
SpanNearQuery query;
|
||||
int allowedSlop;
|
||||
|
||||
List<Spans> subSpans; // in query order
|
||||
DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
||||
boolean atFirstInCurrentDoc;
|
||||
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
|
||||
|
||||
NearSpans(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
this.query = Objects.requireNonNull(query);
|
||||
this.allowedSlop = query.getSlop();
|
||||
if (subSpans.size() < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
|
||||
}
|
||||
this.subSpans = Objects.requireNonNull(subSpans); // in query order
|
||||
this.conjunction = ConjunctionDISI.intersect(subSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return conjunction.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return conjunction.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return (conjunction.nextDoc() == NO_MORE_DOCS)
|
||||
? NO_MORE_DOCS
|
||||
: toMatchDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return (conjunction.advance(target) == NO_MORE_DOCS)
|
||||
? NO_MORE_DOCS
|
||||
: toMatchDoc();
|
||||
}
|
||||
|
||||
abstract int toMatchDoc() throws IOException;
|
||||
|
||||
abstract boolean twoPhaseCurrentDocMatches() throws IOException;
|
||||
|
||||
/**
|
||||
* Return a {@link TwoPhaseIterator} view of this {@link NearSpans}.
|
||||
*/
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
private Spans[] subSpansArray = null; // init only when needed.
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
if (subSpansArray == null) {
|
||||
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
|
||||
}
|
||||
return subSpansArray;
|
||||
}
|
||||
|
||||
}
|
|
@ -17,24 +17,18 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||
* where the subspans do not overlap and have a maximum slop between them.
|
||||
* where the subspans do not overlap and have a maximum slop between them,
|
||||
* and that does not need to collect payloads.
|
||||
* To also collect payloads, see {@link NearSpansPayloadOrdered}.
|
||||
* <p>
|
||||
* The formed spans only contains minimum slop matches.<br>
|
||||
* The matching slop is computed from the distance(s) between
|
||||
|
@ -55,306 +49,196 @@ import java.util.Set;
|
|||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansOrdered extends Spans {
|
||||
private final int allowedSlop;
|
||||
private boolean firstTime = true;
|
||||
private boolean more = false;
|
||||
public class NearSpansOrdered extends NearSpans {
|
||||
|
||||
/** The spans in the same order as the SpanNearQuery */
|
||||
private final Spans[] subSpans;
|
||||
protected int matchDoc = -1;
|
||||
protected int matchStart = -1;
|
||||
protected int matchEnd = -1;
|
||||
|
||||
/** Indicates that all subSpans have same doc() */
|
||||
private boolean inSameDoc = false;
|
||||
|
||||
private int matchDoc = -1;
|
||||
private int matchStart = -1;
|
||||
private int matchEnd = -1;
|
||||
private List<byte[]> matchPayload;
|
||||
|
||||
private final Spans[] subSpansByDoc;
|
||||
// Even though the array is probably almost sorted, InPlaceMergeSorter will likely
|
||||
// perform better since it has a lower overhead than TimSorter for small arrays
|
||||
private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(subSpansByDoc, i, j);
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
|
||||
}
|
||||
};
|
||||
|
||||
private SpanNearQuery query;
|
||||
private boolean collectPayloads = true;
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
this(spanNearQuery, context, acceptDocs, termContexts, true);
|
||||
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
|
||||
super(query, subSpans);
|
||||
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
|
||||
}
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
|
||||
throws IOException {
|
||||
if (spanNearQuery.getClauses().length < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 clauses: "
|
||||
+ spanNearQuery);
|
||||
}
|
||||
this.collectPayloads = collectPayloads;
|
||||
allowedSlop = spanNearQuery.getSlop();
|
||||
SpanQuery[] clauses = spanNearQuery.getClauses();
|
||||
subSpans = new Spans[clauses.length];
|
||||
matchPayload = new LinkedList<>();
|
||||
subSpansByDoc = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
|
||||
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
||||
}
|
||||
query = spanNearQuery; // kept for toString() only.
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int doc() { return matchDoc; }
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int start() { return matchStart; }
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int end() { return matchEnd; }
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
return subSpans;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
// TODO: Would be nice to be able to lazy load payloads
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return matchPayload.isEmpty() == false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long minCost = Long.MAX_VALUE;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
minCost = Math.min(minCost, subSpans[i].cost());
|
||||
}
|
||||
return minCost;
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (firstTime) {
|
||||
firstTime = false;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
if (! subSpans[i].next()) {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
more = true;
|
||||
}
|
||||
if(collectPayloads) {
|
||||
matchPayload.clear();
|
||||
}
|
||||
return advanceAfterOrdered();
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) {
|
||||
firstTime = false;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
if (! subSpans[i].skipTo(target)) {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
more = true;
|
||||
} else if (more && (subSpans[0].doc() < target)) {
|
||||
if (subSpans[0].skipTo(target)) {
|
||||
inSameDoc = false;
|
||||
} else {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if(collectPayloads) {
|
||||
matchPayload.clear();
|
||||
}
|
||||
return advanceAfterOrdered();
|
||||
}
|
||||
|
||||
/** Advances the subSpans to just after an ordered match with a minimum slop
|
||||
* that is smaller than the slop allowed by the SpanNearQuery.
|
||||
* @return true iff there is such a match.
|
||||
*/
|
||||
private boolean advanceAfterOrdered() throws IOException {
|
||||
while (more && (inSameDoc || toSameDoc())) {
|
||||
if (stretchToOrder() && shrinkToAfterShortestMatch()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false; // no more matches
|
||||
}
|
||||
|
||||
|
||||
/** Advance the subSpans to the same document */
|
||||
private boolean toSameDoc() throws IOException {
|
||||
sorter.sort(0, subSpansByDoc.length);
|
||||
int firstIndex = 0;
|
||||
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
|
||||
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
|
||||
if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
|
||||
more = false;
|
||||
inSameDoc = false;
|
||||
return false;
|
||||
}
|
||||
maxDoc = subSpansByDoc[firstIndex].doc();
|
||||
if (++firstIndex == subSpansByDoc.length) {
|
||||
firstIndex = 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < subSpansByDoc.length; i++) {
|
||||
assert (subSpansByDoc[i].doc() == maxDoc)
|
||||
: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
|
||||
+ "\n at doc " + subSpansByDoc[i].doc()
|
||||
+ ", but should be at " + maxDoc;
|
||||
}
|
||||
inSameDoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Check whether two Spans in the same document are ordered and not overlapping.
|
||||
* @return false iff spans2's start position is smaller than spans1's end position
|
||||
*/
|
||||
static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
|
||||
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
|
||||
assert spans1.start() < spans1.end();
|
||||
assert spans2.start() < spans2.end();
|
||||
return spans1.end() <= spans2.start();
|
||||
}
|
||||
|
||||
/** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
|
||||
* starts and ends as parameters.
|
||||
*/
|
||||
private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
|
||||
assert start1 < end1;
|
||||
assert start2 < end2;
|
||||
return end1 <= start2;
|
||||
}
|
||||
|
||||
/** Order the subSpans within the same document by advancing all later spans
|
||||
* after the previous one.
|
||||
*/
|
||||
private boolean stretchToOrder() throws IOException {
|
||||
matchDoc = subSpans[0].doc();
|
||||
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
|
||||
while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
|
||||
if (! subSpans[i].next()) {
|
||||
inSameDoc = false;
|
||||
more = false;
|
||||
break;
|
||||
} else if (matchDoc != subSpans[i].doc()) {
|
||||
inSameDoc = false;
|
||||
break;
|
||||
@Override
|
||||
int toMatchDoc() throws IOException {
|
||||
subSpansToFirstStartPosition();
|
||||
while (true) {
|
||||
if (! stretchToOrder()) {
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
subSpansToFirstStartPosition();
|
||||
} else {
|
||||
if (shrinkToAfterShortestMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return conjunction.docID();
|
||||
}
|
||||
// not a match, after shortest ordered spans, not at beginning of doc.
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
subSpansToFirstStartPosition();
|
||||
}
|
||||
}
|
||||
}
|
||||
return inSameDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
subSpansToFirstStartPosition();
|
||||
while (true) {
|
||||
if (! stretchToOrder()) {
|
||||
return false;
|
||||
}
|
||||
if (shrinkToAfterShortestMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
// not a match, after shortest ordered spans
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return matchStart;
|
||||
}
|
||||
while (true) {
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
matchStart = NO_MORE_POSITIONS;
|
||||
matchEnd = NO_MORE_POSITIONS;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
if (! stretchToOrder()) {
|
||||
matchStart = NO_MORE_POSITIONS;
|
||||
matchEnd = NO_MORE_POSITIONS;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
if (shrinkToAfterShortestMatch()) { // may also leave oneExhaustedInCurrentDoc
|
||||
return matchStart;
|
||||
}
|
||||
// after shortest ordered spans, or oneExhaustedInCurrentDoc
|
||||
}
|
||||
}
|
||||
|
||||
private void subSpansToFirstStartPosition() throws IOException {
|
||||
for (Spans spans : subSpans) {
|
||||
assert spans.startPosition() == -1 : "spans="+spans;
|
||||
spans.nextStartPosition();
|
||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||
}
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
}
|
||||
|
||||
/** Order the subSpans within the same document by using nextStartPosition on all subSpans
|
||||
* after the first as little as necessary.
|
||||
* Return true when the subSpans could be ordered in this way,
|
||||
* otherwise at least one is exhausted in the current doc.
|
||||
*/
|
||||
private boolean stretchToOrder() throws IOException {
|
||||
Spans prevSpans = subSpans.get(0);
|
||||
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
|
||||
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
|
||||
for (int i = 1; i < subSpans.size(); i++) {
|
||||
Spans spans = subSpans.get(i);
|
||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||
assert spans.endPosition() != NO_MORE_POSITIONS;
|
||||
|
||||
while (prevSpans.endPosition() > spans.startPosition()) { // while overlapping spans
|
||||
if (spans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
prevSpans = spans;
|
||||
}
|
||||
return true; // all subSpans ordered and non overlapping
|
||||
}
|
||||
|
||||
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||
* Compute the slop while making the match as short as possible by advancing
|
||||
* all subSpans except the last one in reverse order.
|
||||
* Compute the slop while making the match as short as possible by using nextStartPosition
|
||||
* on all subSpans, except the last one, in reverse order.
|
||||
*/
|
||||
private boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
matchStart = subSpans[subSpans.length - 1].start();
|
||||
matchEnd = subSpans[subSpans.length - 1].end();
|
||||
Set<byte[]> possibleMatchPayloads = new HashSet<>();
|
||||
if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
|
||||
possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
|
||||
}
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
Collection<byte[]> possiblePayload = null;
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
if (collectPayloads && prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
|
||||
int prevStart = prevSpans.start();
|
||||
int prevEnd = prevSpans.end();
|
||||
while (true) { // Advance prevSpans until after (lastStart, lastEnd)
|
||||
if (! prevSpans.next()) {
|
||||
inSameDoc = false;
|
||||
more = false;
|
||||
break; // Check remaining subSpans for final match.
|
||||
} else if (matchDoc != prevSpans.doc()) {
|
||||
inSameDoc = false; // The last subSpans is not advanced here.
|
||||
break; // Check remaining subSpans for last match in this document.
|
||||
} else {
|
||||
int ppStart = prevSpans.start();
|
||||
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
|
||||
if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
|
||||
break; // Check remaining subSpans.
|
||||
} else { // prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
if (collectPayloads && prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
}
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
|
||||
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
break; // Check remaining subSpans for match.
|
||||
}
|
||||
int ppStart = prevSpans.startPosition();
|
||||
int ppEnd = prevSpans.endPosition();
|
||||
if (ppEnd > lastStart) { // if overlapping spans
|
||||
break; // Check remaining subSpans.
|
||||
}
|
||||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
}
|
||||
|
||||
if (collectPayloads && possiblePayload != null) {
|
||||
possibleMatchPayloads.addAll(possiblePayload);
|
||||
}
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||
matchSlop += (matchStart - prevEnd);
|
||||
}
|
||||
|
||||
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||
* that subSpans[0] is advanced after the match, if any.
|
||||
* that on return the first subSpans has nextStartPosition called.
|
||||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
lastEnd = prevEnd;
|
||||
}
|
||||
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
||||
if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
|
||||
matchPayload.addAll(possibleMatchPayloads);
|
||||
}
|
||||
|
||||
return match; // ordered and allowed slop
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : matchStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : matchEnd;
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||
return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/** A {@link NearSpansOrdered} that allows collecting payloads.
|
||||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||
|
||||
private List<byte[]> matchPayload;
|
||||
private Set<byte[]> possibleMatchPayloads;
|
||||
|
||||
public NearSpansPayloadOrdered(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
super(query, subSpans);
|
||||
this.matchPayload = new LinkedList<>();
|
||||
this.possibleMatchPayloads = new HashSet<>();
|
||||
}
|
||||
|
||||
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||
* Compute the slop while making the match as short as possible by using nextStartPosition
|
||||
* on all subSpans, except the last one, in reverse order.
|
||||
* Also collect the payloads.
|
||||
*/
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
matchPayload.clear();
|
||||
possibleMatchPayloads.clear();
|
||||
|
||||
if (lastSubSpans.isPayloadAvailable()) {
|
||||
possibleMatchPayloads.addAll(lastSubSpans.getPayload());
|
||||
}
|
||||
|
||||
Collection<byte[]> possiblePayload = null;
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
|
||||
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
break; // Check remaining subSpans for match.
|
||||
}
|
||||
int ppStart = prevSpans.startPosition();
|
||||
int ppEnd = prevSpans.endPosition();
|
||||
if (ppEnd > lastStart) { // if overlapping spans
|
||||
break; // Check remaining subSpans.
|
||||
}
|
||||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
if (possiblePayload == null) {
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
} else {
|
||||
possiblePayload.clear();
|
||||
}
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
}
|
||||
|
||||
if (possiblePayload != null) {
|
||||
possibleMatchPayloads.addAll(possiblePayload);
|
||||
}
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||
matchSlop += (matchStart - prevEnd);
|
||||
}
|
||||
|
||||
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||
* that on return the first subSpans has nextStartPosition called.
|
||||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
lastEnd = prevEnd;
|
||||
}
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
||||
if (match && possibleMatchPayloads.size() > 0) {
|
||||
matchPayload.addAll(possibleMatchPayloads);
|
||||
}
|
||||
|
||||
return match; // ordered and allowed slop
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
// TODO: Would be nice to be able to lazy load payloads
|
||||
/** Return payloads when available. */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
/** Indicates whether payloads are available */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return ! matchPayload.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
}
|
||||
|
|
@ -17,253 +17,225 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* Similar to {@link NearSpansOrdered}, but for the unordered case.
|
||||
*
|
||||
*
|
||||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansUnordered extends Spans {
|
||||
private SpanNearQuery query;
|
||||
public class NearSpansUnordered extends NearSpans {
|
||||
|
||||
private List<SpansCell> ordered = new ArrayList<>(); // spans in query order
|
||||
private Spans[] subSpans;
|
||||
private int slop; // from query
|
||||
private List<SpansCell> subSpanCells; // in query order
|
||||
|
||||
private SpansCell first; // linked list of spans
|
||||
private SpansCell last; // sorted by doc only
|
||||
private SpanPositionQueue spanPositionQueue;
|
||||
|
||||
private int totalLength; // sum of current lengths
|
||||
public NearSpansUnordered(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
super(query, subSpans);
|
||||
|
||||
private CellQueue queue; // sorted queue of spans
|
||||
private SpansCell max; // max element in queue
|
||||
this.subSpanCells = new ArrayList<>(subSpans.size());
|
||||
for (Spans subSpan : subSpans) { // sub spans in query order
|
||||
this.subSpanCells.add(new SpansCell(subSpan));
|
||||
}
|
||||
spanPositionQueue = new SpanPositionQueue(subSpans.size());
|
||||
singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1
|
||||
}
|
||||
|
||||
private boolean more = true; // true iff not done
|
||||
private boolean firstTime = true; // true before first next()
|
||||
private void singleCellToPositionQueue() {
|
||||
maxEndPositionCell = subSpanCells.get(0);
|
||||
assert maxEndPositionCell.docID() == -1;
|
||||
assert maxEndPositionCell.startPosition() == -1;
|
||||
spanPositionQueue.add(maxEndPositionCell);
|
||||
}
|
||||
|
||||
private class CellQueue extends PriorityQueue<SpansCell> {
|
||||
public CellQueue(int size) {
|
||||
private void subSpanCellsToPositionQueue() throws IOException { // used when all subSpanCells arrived at the same doc.
|
||||
spanPositionQueue.clear();
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
assert cell.startPosition() == -1;
|
||||
cell.nextStartPosition();
|
||||
assert cell.startPosition() != NO_MORE_POSITIONS;
|
||||
spanPositionQueue.add(cell);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpansCell wraps a sub Spans to maintain totalSpanLength and maxEndPositionCell */
|
||||
private int totalSpanLength;
|
||||
private SpansCell maxEndPositionCell;
|
||||
|
||||
private class SpansCell extends FilterSpans {
|
||||
private int spanLength = -1;
|
||||
|
||||
public SpansCell(Spans spans) {
|
||||
super(spans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
int res = in.nextStartPosition();
|
||||
if (res != NO_MORE_POSITIONS) {
|
||||
adjustLength();
|
||||
}
|
||||
adjustMax(); // also after last end position in current doc.
|
||||
return res;
|
||||
}
|
||||
|
||||
private void adjustLength() {
|
||||
if (spanLength != -1) {
|
||||
totalSpanLength -= spanLength; // subtract old, possibly from a previous doc
|
||||
}
|
||||
assert in.startPosition() != NO_MORE_POSITIONS;
|
||||
spanLength = endPosition() - startPosition();
|
||||
assert spanLength >= 0;
|
||||
totalSpanLength += spanLength; // add new
|
||||
}
|
||||
|
||||
private void adjustMax() {
|
||||
assert docID() == maxEndPositionCell.docID();
|
||||
if (endPosition() > maxEndPositionCell.endPosition()) {
|
||||
maxEndPositionCell = this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class SpanPositionQueue extends PriorityQueue<SpansCell> {
|
||||
public SpanPositionQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
|
||||
if (spans1.doc() == spans2.doc()) {
|
||||
return docSpansOrdered(spans1, spans2);
|
||||
} else {
|
||||
return spans1.doc() < spans2.doc();
|
||||
}
|
||||
return positionsOrdered(spans1, spans2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Wraps a Spans, and can be used to form a linked list. */
|
||||
private class SpansCell extends Spans {
|
||||
private Spans spans;
|
||||
private SpansCell next;
|
||||
private int length = -1;
|
||||
private int index;
|
||||
|
||||
public SpansCell(Spans spans, int index) {
|
||||
this.spans = spans;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return adjust(spans.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
return adjust(spans.skipTo(target));
|
||||
}
|
||||
|
||||
private boolean adjust(boolean condition) {
|
||||
if (length != -1) {
|
||||
totalLength -= length; // subtract old length
|
||||
}
|
||||
if (condition) {
|
||||
length = end() - start();
|
||||
totalLength += length; // add new length
|
||||
|
||||
if (max == null || doc() > max.doc()
|
||||
|| (doc() == max.doc()) && (end() > max.end())) {
|
||||
max = this;
|
||||
}
|
||||
}
|
||||
more = condition;
|
||||
return condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return spans.doc(); }
|
||||
|
||||
@Override
|
||||
public int start() { return spans.start(); }
|
||||
|
||||
@Override
|
||||
public int end() { return spans.end(); }
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return new ArrayList<>(spans.getPayload());
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return spans.isPayloadAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return spans.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() { return spans.toString() + "#" + index; }
|
||||
}
|
||||
|
||||
|
||||
public NearSpansUnordered(SpanNearQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
this.query = query;
|
||||
this.slop = query.getSlop();
|
||||
|
||||
SpanQuery[] clauses = query.getClauses();
|
||||
queue = new CellQueue(clauses.length);
|
||||
subSpans = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
SpansCell cell =
|
||||
new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
|
||||
ordered.add(cell);
|
||||
subSpans[i] = cell.spans;
|
||||
}
|
||||
}
|
||||
public Spans[] getSubSpans() {
|
||||
return subSpans;
|
||||
}
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (firstTime) {
|
||||
initList(true);
|
||||
listToQueue(); // initialize queue
|
||||
firstTime = false;
|
||||
} else if (more) {
|
||||
if (min().next()) { // trigger further scanning
|
||||
queue.updateTop(); // maintain queue
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
}
|
||||
|
||||
while (more) {
|
||||
|
||||
boolean queueStale = false;
|
||||
|
||||
if (min().doc() != max.doc()) { // maintain list
|
||||
queueToList();
|
||||
queueStale = true;
|
||||
}
|
||||
|
||||
// skip to doc w/ all clauses
|
||||
|
||||
while (more && first.doc() < last.doc()) {
|
||||
more = first.skipTo(last.doc()); // skip first upto last
|
||||
firstToLast(); // and move it to the end
|
||||
queueStale = true;
|
||||
}
|
||||
|
||||
if (!more) return false;
|
||||
|
||||
// found doc w/ all clauses
|
||||
|
||||
if (queueStale) { // maintain the queue
|
||||
listToQueue();
|
||||
queueStale = false;
|
||||
}
|
||||
|
||||
if (atMatch()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
more = min().next();
|
||||
if (more) {
|
||||
queue.updateTop(); // maintain queue
|
||||
}
|
||||
}
|
||||
return false; // no more matches
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) { // initialize
|
||||
initList(false);
|
||||
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
|
||||
more = cell.skipTo(target); // skip all
|
||||
}
|
||||
if (more) {
|
||||
listToQueue();
|
||||
}
|
||||
firstTime = false;
|
||||
} else { // normal case
|
||||
while (more && min().doc() < target) { // skip as needed
|
||||
if (min().skipTo(target)) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return more && (atMatch() || next());
|
||||
}
|
||||
|
||||
/** Check whether two Spans in the same document are ordered with possible overlap.
|
||||
* @return true iff spans1 starts before spans2
|
||||
* or the spans start at the same position,
|
||||
* and spans1 ends before spans2.
|
||||
*/
|
||||
static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
|
||||
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
|
||||
int start1 = spans1.start();
|
||||
int start2 = spans2.start();
|
||||
return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
|
||||
static final boolean positionsOrdered(Spans spans1, Spans spans2) {
|
||||
assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID();
|
||||
int start1 = spans1.startPosition();
|
||||
int start2 = spans2.startPosition();
|
||||
return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2);
|
||||
}
|
||||
|
||||
private SpansCell min() { return queue.top(); }
|
||||
private SpansCell minPositionCell() {
|
||||
return spanPositionQueue.top();
|
||||
}
|
||||
|
||||
private boolean atMatch() {
|
||||
assert minPositionCell().docID() == maxEndPositionCell.docID();
|
||||
return (maxEndPositionCell.endPosition() - minPositionCell().startPosition() - totalSpanLength) <= allowedSlop;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return min().doc(); }
|
||||
@Override
|
||||
public int start() { return min().start(); }
|
||||
@Override
|
||||
public int end() { return max.end(); }
|
||||
int toMatchDoc() throws IOException {
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
while (true) {
|
||||
if (atMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
return conjunction.docID();
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
else { // exhausted a subSpan in current doc
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
while (true) {
|
||||
if (atMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
return true;
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
else { // exhausted a subSpan in current doc
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return minPositionCell().startPosition();
|
||||
}
|
||||
while (minPositionCell().startPosition() == -1) { // initially at current doc
|
||||
minPositionCell().nextStartPosition();
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
while (true) {
|
||||
if (minPositionCell().nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
spanPositionQueue.updateTop();
|
||||
if (atMatch()) {
|
||||
return minPositionCell().startPosition();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
assert minPositionCell() != null;
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
|
||||
: minPositionCell().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
|
||||
: maxEndPositionCell.endPosition();
|
||||
}
|
||||
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
/**
|
||||
* WARNING: The List is not necessarily in order of the the positions
|
||||
* WARNING: The List is not necessarily in order of the positions.
|
||||
* @return Collection of <code>byte[]</code> payloads
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
Set<byte[]> matchPayload = new HashSet<>();
|
||||
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
matchPayload.addAll(cell.getPayload());
|
||||
}
|
||||
|
@ -271,78 +243,23 @@ public class NearSpansUnordered extends Spans {
|
|||
return matchPayload;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
SpansCell pointer = min();
|
||||
while (pointer != null) {
|
||||
if (pointer.isPayloadAvailable()) {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
return true;
|
||||
}
|
||||
pointer = pointer.next;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long minCost = Long.MAX_VALUE;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
minCost = Math.min(minCost, subSpans[i].cost());
|
||||
}
|
||||
return minCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||
}
|
||||
|
||||
private void initList(boolean next) throws IOException {
|
||||
for (int i = 0; more && i < ordered.size(); i++) {
|
||||
SpansCell cell = ordered.get(i);
|
||||
if (next)
|
||||
more = cell.next(); // move to first entry
|
||||
if (more) {
|
||||
addToList(cell); // add to list
|
||||
}
|
||||
if (minPositionCell() != null) {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(docID()+":"+startPosition()+"-"+endPosition());
|
||||
} else {
|
||||
return getClass().getName() + "("+query.toString()+")@ ?START?";
|
||||
}
|
||||
}
|
||||
|
||||
private void addToList(SpansCell cell) {
|
||||
if (last != null) { // add next to end of list
|
||||
last.next = cell;
|
||||
} else
|
||||
first = cell;
|
||||
last = cell;
|
||||
cell.next = null;
|
||||
}
|
||||
|
||||
private void firstToLast() {
|
||||
last.next = first; // move first to end of list
|
||||
last = first;
|
||||
first = first.next;
|
||||
last.next = null;
|
||||
}
|
||||
|
||||
private void queueToList() {
|
||||
last = first = null;
|
||||
while (queue.top() != null) {
|
||||
addToList(queue.pop());
|
||||
}
|
||||
}
|
||||
|
||||
private void listToQueue() {
|
||||
queue.clear(); // rebuild queue
|
||||
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||
queue.add(cell); // add to queue from list
|
||||
}
|
||||
}
|
||||
|
||||
private boolean atMatch() {
|
||||
return (min().doc() == max.doc())
|
||||
&& ((max.end() - min().start() - totalLength) <= slop);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,9 +21,9 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
/**
|
||||
* Matches spans near the beginning of a field.
|
||||
* <p>
|
||||
* <p>
|
||||
* This class is a simple extension of {@link SpanPositionRangeQuery} in that it assumes the
|
||||
* start to be zero and only checks the end boundary.
|
||||
*/
|
||||
|
@ -37,10 +37,10 @@ public class SpanFirstQuery extends SpanPositionRangeQuery {
|
|||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
assert spans.start() != spans.end() : "start equals end: " + spans.start();
|
||||
if (spans.start() >= end)
|
||||
return AcceptStatus.NO_AND_ADVANCE;
|
||||
else if (spans.end() <= end)
|
||||
assert spans.startPosition() != spans.endPosition() : "start equals end: " + spans.startPosition();
|
||||
if (spans.startPosition() >= end)
|
||||
return AcceptStatus.NO_MORE_IN_CURRENT_DOC;
|
||||
else if (spans.endPosition() <= end)
|
||||
return AcceptStatus.YES;
|
||||
else
|
||||
return AcceptStatus.NO;
|
||||
|
|
|
@ -105,7 +105,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
//TODO: is this right?
|
||||
h ^= payloadToMatch.hashCode();
|
||||
|
|
|
@ -37,7 +37,8 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
|
||||
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||
* maximum number of intervening unmatched positions, as well as whether
|
||||
* matches are required to be in-order. */
|
||||
* matches are required to be in-order.
|
||||
*/
|
||||
public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||
protected List<SpanQuery> clauses;
|
||||
protected int slop;
|
||||
|
@ -53,22 +54,19 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
* must be in the same order as in <code>clauses</code> and must be non-overlapping.
|
||||
* <br>When <code>inOrder</code> is false, the spans from each clause
|
||||
* need not be ordered and may overlap.
|
||||
* @param clauses the clauses to find near each other
|
||||
* @param clauses the clauses to find near each other, in the same field, at least 2.
|
||||
* @param slop The slop value
|
||||
* @param inOrder true if order is important
|
||||
*/
|
||||
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
|
||||
this(clauses, slop, inOrder, true);
|
||||
this(clauses, slop, inOrder, true);
|
||||
}
|
||||
|
||||
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) {
|
||||
|
||||
// copy clauses array into an ArrayList
|
||||
this.clauses = new ArrayList<>(clauses.length);
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
SpanQuery clause = clauses[i];
|
||||
if (field == null) { // check field
|
||||
field = clause.getField();
|
||||
public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, boolean collectPayloads) {
|
||||
this.clauses = new ArrayList<>(clausesIn.length);
|
||||
for (SpanQuery clause : clausesIn) {
|
||||
if (this.field == null) { // check field
|
||||
this.field = clause.getField();
|
||||
} else if (clause.getField() != null && !clause.getField().equals(field)) {
|
||||
throw new IllegalArgumentException("Clauses must have same field.");
|
||||
}
|
||||
|
@ -92,14 +90,13 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public String getField() { return field; }
|
||||
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (final SpanQuery clause : clauses) {
|
||||
clause.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
|
@ -124,15 +121,21 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 0) // optimize 0-clause case
|
||||
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
return inOrder
|
||||
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
|
||||
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
} else {
|
||||
return null; // all required
|
||||
}
|
||||
}
|
||||
|
||||
// all NearSpans require at least two subSpans
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
||||
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
|
||||
: new NearSpansOrdered(this, subSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -148,12 +151,12 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
}
|
||||
if (clone != null) {
|
||||
return clone; // some clauses rewrote
|
||||
return clone; // some clauses rewrote
|
||||
} else {
|
||||
return this; // no clauses rewrote
|
||||
return this; // no clauses rewrote
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SpanNearQuery clone() {
|
||||
int sz = clauses.size();
|
||||
|
|
|
@ -30,9 +30,11 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
/** Removes matches which overlap with another SpanQuery or
|
||||
* within a x tokens before or y tokens after another SpanQuery. */
|
||||
/** Removes matches which overlap with another SpanQuery or which are
|
||||
* within x tokens before or y tokens after another SpanQuery.
|
||||
*/
|
||||
public class SpanNotQuery extends SpanQuery implements Cloneable {
|
||||
private SpanQuery include;
|
||||
private SpanQuery exclude;
|
||||
|
@ -45,20 +47,20 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
this(include, exclude, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>dist</code> tokens of <code>include</code>. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
|
||||
this(include, exclude, dist, dist);
|
||||
}
|
||||
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
||||
this.include = include;
|
||||
this.exclude = exclude;
|
||||
this.include = Objects.requireNonNull(include);
|
||||
this.exclude = Objects.requireNonNull(exclude);
|
||||
this.pre = (pre >=0) ? pre : 0;
|
||||
this.post = (post >= 0) ? post : 0;
|
||||
|
||||
|
@ -96,81 +98,153 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public SpanNotQuery clone() {
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
|
||||
(SpanQuery) exclude.clone(), pre, post);
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.clone(),
|
||||
(SpanQuery) exclude.clone(), pre, post);
|
||||
spanNotQuery.setBoost(getBoost());
|
||||
return spanNotQuery;
|
||||
return spanNotQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
if (includeSpans == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
if (excludeSpans == null) {
|
||||
return includeSpans;
|
||||
}
|
||||
|
||||
return new Spans() {
|
||||
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreInclude = true;
|
||||
private boolean moreInclude = true;
|
||||
private int includeStart = -1;
|
||||
private int includeEnd = -1;
|
||||
private boolean atFirstInCurrentDoc = false;
|
||||
|
||||
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreExclude = excludeSpans.next();
|
||||
private boolean moreExclude = excludeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
private int excludeStart = moreExclude ? excludeSpans.nextStartPosition() : NO_MORE_POSITIONS;
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (moreInclude) // move to next include
|
||||
moreInclude = includeSpans.next();
|
||||
|
||||
while (moreInclude && moreExclude) {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (moreInclude) {
|
||||
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
toNextIncluded();
|
||||
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
|
||||
return res;
|
||||
}
|
||||
|
||||
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
|
||||
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start() - pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
private void toNextIncluded() throws IOException {
|
||||
while (moreInclude && moreExclude) {
|
||||
if (includeSpans.docID() > excludeSpans.docID()) {
|
||||
moreExclude = excludeSpans.advance(includeSpans.docID()) != NO_MORE_DOCS;
|
||||
if (moreExclude) {
|
||||
excludeStart = -1; // only use exclude positions at same doc
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
break; // we found a match
|
||||
|
||||
moreInclude = includeSpans.next(); // intersected: keep scanning
|
||||
}
|
||||
return moreInclude;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (moreInclude) // skip include
|
||||
moreInclude = includeSpans.skipTo(target);
|
||||
|
||||
if (!moreInclude)
|
||||
return false;
|
||||
|
||||
if (moreExclude // skip exclude
|
||||
&& includeSpans.doc() > excludeSpans.doc())
|
||||
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start()-pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
if (excludeForwardInCurrentDocAndAtMatch()) {
|
||||
break; // at match.
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
return true; // we found a match
|
||||
// else intersected: keep scanning, to next doc if needed
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
if (includeStart == NO_MORE_POSITIONS) {
|
||||
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return next(); // scan to next match
|
||||
private boolean excludeForwardInCurrentDocAndAtMatch() throws IOException {
|
||||
assert moreInclude;
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
if (! moreExclude) {
|
||||
return true;
|
||||
}
|
||||
if (includeSpans.docID() != excludeSpans.docID()) {
|
||||
return true;
|
||||
}
|
||||
// at same doc
|
||||
if (excludeStart == -1) { // init exclude start position if needed
|
||||
excludeStart = excludeSpans.nextStartPosition();
|
||||
assert excludeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
while (excludeSpans.endPosition() <= includeStart - pre) {
|
||||
// exclude end position is before a possible exclusion
|
||||
excludeStart = excludeSpans.nextStartPosition();
|
||||
if (excludeStart == NO_MORE_POSITIONS) {
|
||||
return true; // no more exclude at current doc.
|
||||
}
|
||||
}
|
||||
// exclude end position far enough in current doc, check start position:
|
||||
boolean res = includeSpans.endPosition() + post <= excludeStart;
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (moreInclude) {
|
||||
assert target > includeSpans.docID() : "target="+target+", includeSpans.docID()="+includeSpans.docID();
|
||||
moreInclude = includeSpans.advance(target) != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
toNextIncluded();
|
||||
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
int res = includeSpans.docID();
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
assert moreInclude;
|
||||
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
return includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return includeSpans.doc(); }
|
||||
@Override
|
||||
public int start() { return includeSpans.start(); }
|
||||
@Override
|
||||
public int end() { return includeSpans.end(); }
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
while ((includeStart != NO_MORE_POSITIONS)
|
||||
&& (! excludeForwardInCurrentDocAndAtMatch()))
|
||||
{
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
}
|
||||
|
||||
return includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
assert includeStart == includeSpans.startPosition();
|
||||
return atFirstInCurrentDoc ? -1 : includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : includeSpans.endPosition();
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
ArrayList<byte[]> result = null;
|
||||
|
@ -180,7 +254,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
return result;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return includeSpans.isPayloadAvailable();
|
||||
|
@ -193,10 +266,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + SpanNotQuery.this.toString() + ")";
|
||||
}
|
||||
|
||||
};
|
||||
return "spans(" + SpanNotQuery.this.toString() + ")";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -230,7 +302,7 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
SpanNotQuery other = (SpanNotQuery)o;
|
||||
return this.include.equals(other.include)
|
||||
&& this.exclude.equals(other.exclude)
|
||||
&& this.pre == other.pre
|
||||
&& this.pre == other.pre
|
||||
&& this.post == other.post;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,18 +35,19 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/** Matches the union of its clauses.*/
|
||||
/** Matches the union of its clauses.
|
||||
*/
|
||||
public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||
private List<SpanQuery> clauses;
|
||||
private String field;
|
||||
|
||||
/** Construct a SpanOrQuery merging the provided clauses. */
|
||||
/** Construct a SpanOrQuery merging the provided clauses.
|
||||
* All clauses must have the same field.
|
||||
*/
|
||||
public SpanOrQuery(SpanQuery... clauses) {
|
||||
|
||||
// copy clauses array into an ArrayList
|
||||
this.clauses = new ArrayList<>(clauses.length);
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
addClause(clauses[i]);
|
||||
for (SpanQuery seq : clauses) {
|
||||
addClause(seq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,7 +60,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
this.clauses.add(clause);
|
||||
}
|
||||
|
||||
|
||||
/** Return the clauses whose spans are matched. */
|
||||
public SpanQuery[] getClauses() {
|
||||
return clauses.toArray(new SpanQuery[clauses.size()]);
|
||||
|
@ -74,7 +75,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
clause.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SpanOrQuery clone() {
|
||||
int sz = clauses.size();
|
||||
|
@ -152,90 +153,120 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
protected final boolean lessThan(Spans spans1, Spans spans2) {
|
||||
if (spans1.doc() == spans2.doc()) {
|
||||
if (spans1.start() == spans2.start()) {
|
||||
return spans1.end() < spans2.end();
|
||||
if (spans1.docID() == spans2.docID()) {
|
||||
if (spans1.startPosition() == spans2.startPosition()) {
|
||||
return spans1.endPosition() < spans2.endPosition();
|
||||
} else {
|
||||
return spans1.start() < spans2.start();
|
||||
return spans1.startPosition() < spans2.startPosition();
|
||||
}
|
||||
} else {
|
||||
return spans1.doc() < spans2.doc();
|
||||
return spans1.docID() < spans2.docID();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
}
|
||||
}
|
||||
|
||||
if (subSpans.size() == 0) {
|
||||
return null;
|
||||
} else if (subSpans.size() == 1) {
|
||||
return subSpans.get(0);
|
||||
}
|
||||
|
||||
SpanQueue queue = new SpanQueue(clauses.size());
|
||||
for (Spans spans : subSpans) {
|
||||
queue.add(spans);
|
||||
}
|
||||
|
||||
return new Spans() {
|
||||
private SpanQueue queue = null;
|
||||
private long cost;
|
||||
|
||||
private boolean initSpanQueue(int target) throws IOException {
|
||||
queue = new SpanQueue(clauses.size());
|
||||
Iterator<SpanQuery> i = clauses.iterator();
|
||||
while (i.hasNext()) {
|
||||
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
|
||||
cost += spans.cost();
|
||||
if ( ((target == -1) && spans.next())
|
||||
|| ((target != -1) && spans.skipTo(target))) {
|
||||
queue.add(spans);
|
||||
}
|
||||
}
|
||||
return queue.size() != 0;
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (queue.size() == 0) { // all done
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (queue == null) {
|
||||
return initSpanQueue(-1);
|
||||
}
|
||||
int currentDoc = top().docID();
|
||||
|
||||
if (queue.size() == 0) { // all done
|
||||
return false;
|
||||
}
|
||||
if (currentDoc == -1) { // initially
|
||||
return advance(0);
|
||||
}
|
||||
|
||||
if (top().next()) { // move to next
|
||||
do {
|
||||
if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
|
||||
queue.updateTop();
|
||||
return true;
|
||||
}
|
||||
|
||||
queue.pop(); // exhausted a clause
|
||||
return queue.size() != 0;
|
||||
}
|
||||
|
||||
private Spans top() { return queue.top(); }
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (queue == null) {
|
||||
return initSpanQueue(target);
|
||||
}
|
||||
|
||||
boolean skipCalled = false;
|
||||
while (queue.size() != 0 && top().doc() < target) {
|
||||
if (top().skipTo(target)) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop();
|
||||
} else {
|
||||
queue.pop(); // exhausted a clause
|
||||
if (queue.size() == 0) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
skipCalled = true;
|
||||
}
|
||||
|
||||
if (skipCalled) {
|
||||
return queue.size() != 0;
|
||||
// assert queue.size() > 0;
|
||||
int doc = top().docID();
|
||||
if (doc > currentDoc) {
|
||||
return doc;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
private Spans top() {
|
||||
return queue.top();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
while ((queue.size() > 0) && (top().docID() < target)) {
|
||||
if (top().advance(target) != NO_MORE_DOCS) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop();
|
||||
}
|
||||
return next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return top().doc(); }
|
||||
@Override
|
||||
public int start() { return top().start(); }
|
||||
@Override
|
||||
public int end() { return top().end(); }
|
||||
return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return (queue == null) ? -1
|
||||
: (queue.size() > 0) ? top().docID()
|
||||
: NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
int startPos = top().startPosition();
|
||||
while (startPos == -1) { // initially at this doc
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
startPos = top().startPosition();
|
||||
}
|
||||
return startPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return top().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return top().endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
|
@ -257,15 +288,23 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
public String toString() {
|
||||
return "spans("+SpanOrQuery.this+")@"+
|
||||
((queue == null)?"START"
|
||||
:(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
|
||||
}
|
||||
:(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
|
||||
}
|
||||
|
||||
private long cost = -1;
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
if (cost == -1) {
|
||||
cost = 0;
|
||||
for (Spans spans : subSpans) {
|
||||
cost += spans.cost();
|
||||
}
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,15 +28,14 @@ import java.util.Iterator;
|
|||
* Only return those matches that have a specific payload at
|
||||
* the given position.
|
||||
* <p>
|
||||
* Do not use this with an SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. Instead, use
|
||||
* {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
* Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
*/
|
||||
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
|
||||
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
|
||||
protected final Collection<byte[]> payloadToMatch;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
|
||||
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
|
||||
*/
|
||||
|
@ -71,7 +70,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
|
|||
}
|
||||
}
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
|
@ -108,7 +107,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
//TODO: is this right?
|
||||
h ^= payloadToMatch.hashCode();
|
||||
|
|
|
@ -25,10 +25,9 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -37,9 +36,8 @@ import java.util.Set;
|
|||
public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable {
|
||||
protected SpanQuery match;
|
||||
|
||||
|
||||
public SpanPositionCheckQuery(SpanQuery match) {
|
||||
this.match = match;
|
||||
this.match = Objects.requireNonNull(match);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,42 +58,44 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
match.extractTerms(terms);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Return value for {@link SpanPositionCheckQuery#acceptPosition(Spans)}.
|
||||
*/
|
||||
protected static enum AcceptStatus {
|
||||
/** Indicates the match should be accepted */
|
||||
YES,
|
||||
|
||||
|
||||
/** Indicates the match should be rejected */
|
||||
NO,
|
||||
|
||||
/**
|
||||
* Indicates the match should be rejected, and the enumeration should advance
|
||||
* to the next document.
|
||||
|
||||
/**
|
||||
* Indicates the match should be rejected, and the enumeration may continue
|
||||
* with the next document.
|
||||
*/
|
||||
NO_AND_ADVANCE
|
||||
NO_MORE_IN_CURRENT_DOC
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Implementing classes are required to return whether the current position is a match for the passed in
|
||||
* "match" {@link org.apache.lucene.search.spans.SpanQuery}.
|
||||
* "match" {@link SpanQuery}.
|
||||
*
|
||||
* This is only called if the underlying {@link org.apache.lucene.search.spans.Spans#next()} for the
|
||||
* match is successful
|
||||
* This is only called if the underlying last {@link Spans#nextStartPosition()} for the
|
||||
* match indicated a valid start position.
|
||||
*
|
||||
*
|
||||
* @param spans The {@link org.apache.lucene.search.spans.Spans} instance, positioned at the spot to check
|
||||
* @param spans The {@link Spans} instance, positioned at the spot to check
|
||||
*
|
||||
* @return whether the match is accepted, rejected, or rejected and should move to the next doc.
|
||||
*
|
||||
* @see org.apache.lucene.search.spans.Spans#next()
|
||||
* @see Spans#nextDoc()
|
||||
*
|
||||
*/
|
||||
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
return new PositionCheckSpan(context, acceptDocs, termContexts);
|
||||
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
|
||||
return (matchSpans == null) ? null : new PositionCheckSpans(matchSpans);
|
||||
}
|
||||
|
||||
|
||||
|
@ -116,79 +116,110 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
}
|
||||
}
|
||||
|
||||
protected class PositionCheckSpan extends Spans {
|
||||
private Spans spans;
|
||||
protected class PositionCheckSpans extends FilterSpans {
|
||||
|
||||
public PositionCheckSpan(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
spans = match.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean atFirstInCurrentDoc = false;
|
||||
private int startPos = -1;
|
||||
|
||||
public PositionCheckSpans(Spans matchSpans) throws IOException {
|
||||
super(matchSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (!spans.next())
|
||||
return false;
|
||||
|
||||
return doNext();
|
||||
public int nextDoc() throws IOException {
|
||||
if (in.nextDoc() == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return toNextDocWithAllowedPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (!spans.skipTo(target))
|
||||
return false;
|
||||
public int advance(int target) throws IOException {
|
||||
if (in.advance(target) == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return doNext();
|
||||
return toNextDocWithAllowedPosition();
|
||||
}
|
||||
|
||||
protected boolean doNext() throws IOException {
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
protected int toNextDocWithAllowedPosition() throws IOException {
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS;
|
||||
for (;;) {
|
||||
switch(acceptPosition(this)) {
|
||||
case YES: return true;
|
||||
case NO:
|
||||
if (!spans.next())
|
||||
return false;
|
||||
break;
|
||||
case NO_AND_ADVANCE:
|
||||
if (!spans.skipTo(spans.doc()+1))
|
||||
return false;
|
||||
case YES:
|
||||
atFirstInCurrentDoc = true;
|
||||
return in.docID();
|
||||
case NO:
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos != NO_MORE_POSITIONS) {
|
||||
break;
|
||||
}
|
||||
// else fallthrough
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
if (in.nextDoc() == NO_MORE_DOCS) {
|
||||
startPos = -1;
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return spans.doc(); }
|
||||
|
||||
@Override
|
||||
public int start() { return spans.start(); }
|
||||
|
||||
@Override
|
||||
public int end() { return spans.end(); }
|
||||
// TODO: Remove warning after API has been finalized
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
ArrayList<byte[]> result = null;
|
||||
if (spans.isPayloadAvailable()) {
|
||||
result = new ArrayList<>(spans.getPayload());
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return startPos;
|
||||
}
|
||||
return result;//TODO: any way to avoid the new construction?
|
||||
}
|
||||
// TODO: Remove warning after API has been finalized
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return spans.isPayloadAvailable();
|
||||
for (;;) {
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos == NO_MORE_POSITIONS) {
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
switch(acceptPosition(this)) {
|
||||
case YES:
|
||||
return startPos;
|
||||
case NO:
|
||||
break;
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
return startPos = NO_MORE_POSITIONS; // startPos ahead for the current doc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return spans.cost();
|
||||
public int startPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : startPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
|
||||
}
|
||||
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true iff <code>o</code> is equal to this. */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null) return false;
|
||||
if (getClass() != o.getClass()) return false;
|
||||
final SpanPositionCheckQuery spcq = (SpanPositionCheckQuery) o;
|
||||
return match.equals(spcq.match);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return match.hashCode() ^ getClass().hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,10 +25,10 @@ import java.io.IOException;
|
|||
/**
|
||||
* Checks to see if the {@link #getMatch()} lies between a start and end position
|
||||
*
|
||||
* @see org.apache.lucene.search.spans.SpanFirstQuery for a derivation that is optimized for the case where start position is 0
|
||||
* See {@link SpanFirstQuery} for a derivation that is optimized for the case where start position is 0.
|
||||
*/
|
||||
public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
||||
protected int start = 0;
|
||||
protected int start;
|
||||
protected int end;
|
||||
|
||||
public SpanPositionRangeQuery(SpanQuery match, int start, int end) {
|
||||
|
@ -40,13 +40,12 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
assert spans.start() != spans.end();
|
||||
if (spans.start() >= end)
|
||||
return AcceptStatus.NO_AND_ADVANCE;
|
||||
else if (spans.start() >= start && spans.end() <= end)
|
||||
return AcceptStatus.YES;
|
||||
else
|
||||
return AcceptStatus.NO;
|
||||
assert spans.startPosition() != spans.endPosition();
|
||||
AcceptStatus res = (spans.startPosition() >= end)
|
||||
? AcceptStatus.NO_MORE_IN_CURRENT_DOC
|
||||
: (spans.startPosition() >= start && spans.endPosition() <= end)
|
||||
? AcceptStatus.YES : AcceptStatus.NO;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -96,7 +95,7 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
h ^= Float.floatToRawIntBits(getBoost()) ^ end ^ start;
|
||||
return h;
|
||||
|
|
|
@ -25,16 +25,17 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
/** Expert: Returns the matches for this query in an index. Used internally
|
||||
* to search for spans. */
|
||||
/** Expert: Returns the matches for this query in an index.
|
||||
* Used internally to search for spans.
|
||||
* This may return null to indicate that the SpanQuery has no results.
|
||||
*/
|
||||
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the name of the field matched by this query.
|
||||
* <p>
|
||||
* Note that this may return null if the query matches no terms.
|
||||
|
@ -42,7 +43,7 @@ public abstract class SpanQuery extends Query {
|
|||
public abstract String getField();
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,9 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/**
|
||||
|
@ -29,58 +29,68 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
public class SpanScorer extends Scorer {
|
||||
protected Spans spans;
|
||||
|
||||
protected boolean more = true;
|
||||
|
||||
protected int doc;
|
||||
protected float freq;
|
||||
protected int numMatches;
|
||||
protected final Similarity.SimScorer docScorer;
|
||||
|
||||
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
|
||||
|
||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
|
||||
throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.spans = spans;
|
||||
|
||||
doc = -1;
|
||||
more = spans.next();
|
||||
this.docScorer = Objects.requireNonNull(docScorer);
|
||||
this.spans = Objects.requireNonNull(spans);
|
||||
this.doc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (!setFreqCurrentDoc()) {
|
||||
doc = NO_MORE_DOCS;
|
||||
int prevDoc = doc;
|
||||
doc = spans.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (!more) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
|
||||
more = spans.skipTo(target);
|
||||
}
|
||||
if (!setFreqCurrentDoc()) {
|
||||
doc = NO_MORE_DOCS;
|
||||
int prevDoc = doc;
|
||||
doc = spans.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
|
||||
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
|
||||
assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
|
||||
int prevStartPos = -1;
|
||||
int prevEndPos = -1;
|
||||
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
assert startPos >= prevStartPos;
|
||||
int endPos = spans.endPosition();
|
||||
assert endPos != Spans.NO_MORE_POSITIONS;
|
||||
// This assertion can fail for Or spans on the same term:
|
||||
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
|
||||
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
|
||||
numMatches++;
|
||||
more = spans.next();
|
||||
} while (more && (doc == spans.doc()));
|
||||
int matchLength = endPos - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
prevStartPos = startPos;
|
||||
prevEndPos = endPos;
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
|
||||
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
|
||||
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -89,15 +99,16 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return docScorer.score(doc, freq);
|
||||
float s = docScorer.score(doc, freq);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
||||
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
||||
* @lucene.internal */
|
||||
// only public so .payloads can see it.
|
||||
public float sloppyFreq() throws IOException {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -31,19 +32,23 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** Matches spans containing a term. */
|
||||
/** Matches spans containing a term.
|
||||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
public class SpanTermQuery extends SpanQuery {
|
||||
protected Term term;
|
||||
|
||||
/** Construct a SpanTermQuery matching the named term's spans. */
|
||||
public SpanTermQuery(Term term) { this.term = term; }
|
||||
public SpanTermQuery(Term term) {
|
||||
this.term = Objects.requireNonNull(term);
|
||||
}
|
||||
|
||||
/** Return the term whose spans are matched. */
|
||||
public Term getTerm() { return term; }
|
||||
|
||||
@Override
|
||||
public String getField() { return term.field(); }
|
||||
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
terms.add(term);
|
||||
|
@ -64,7 +69,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||
result = prime * result + term.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -77,12 +82,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
SpanTermQuery other = (SpanTermQuery) obj;
|
||||
if (term == null) {
|
||||
if (other.term != null)
|
||||
return false;
|
||||
} else if (!term.equals(other.term))
|
||||
return false;
|
||||
return true;
|
||||
return term.equals(other.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -95,7 +95,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
final Terms terms = context.reader().terms(term.field());
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
state = termsEnum.termState();
|
||||
} else {
|
||||
state = null;
|
||||
|
@ -106,14 +106,14 @@ public class SpanTermQuery extends SpanQuery {
|
|||
} else {
|
||||
state = termContext.get(context.ord);
|
||||
}
|
||||
|
||||
|
||||
if (state == null) { // term is not present in that reader
|
||||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS);
|
||||
|
||||
if (postings != null) {
|
||||
|
|
|
@ -51,7 +51,7 @@ public class SpanWeight extends Weight {
|
|||
super(query);
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.query = query;
|
||||
|
||||
|
||||
termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
query.extractTerms(terms);
|
||||
|
@ -66,8 +66,8 @@ public class SpanWeight extends Weight {
|
|||
}
|
||||
final String field = query.getField();
|
||||
if (field != null) {
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
termStats);
|
||||
}
|
||||
}
|
||||
|
@ -88,9 +88,9 @@ public class SpanWeight extends Weight {
|
|||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (stats == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
|
||||
}
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -106,11 +106,11 @@ public class SpanWeight extends Weight {
|
|||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
result.addDetail(scoreExplanation);
|
||||
result.setValue(scoreExplanation.getValue());
|
||||
result.setMatch(true);
|
||||
result.setMatch(true);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,54 +20,44 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
/** Expert: an enumeration of span matches. Used to implement span searching.
|
||||
* Each span represents a range of term positions within a document. Matches
|
||||
* are enumerated in order, by increasing document number, within that by
|
||||
* increasing start position and finally by increasing end position. */
|
||||
public abstract class Spans {
|
||||
/** Move to the next match, returning true iff any such exists. */
|
||||
public abstract boolean next() throws IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/** Skips to the first match beyond the current, whose document number is
|
||||
* greater than or equal to <i>target</i>.
|
||||
* <p>The behavior of this method is <b>undefined</b> when called with
|
||||
* <code> target ≤ current</code>, or after the iterator has exhausted.
|
||||
* Both cases may result in unpredicted behavior.
|
||||
* <p>Returns true iff there is such
|
||||
* a match. <p>Behaves as if written:
|
||||
* <pre class="prettyprint">
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
* } while (target > doc());
|
||||
* return true;
|
||||
* }
|
||||
* </pre>
|
||||
* Most implementations are considerably more efficient than that.
|
||||
*/
|
||||
public abstract boolean skipTo(int target) throws IOException;
|
||||
/** Iterates through combinations of start/end positions per-doc.
|
||||
* Each start/end position represents a range of term positions within the current document.
|
||||
* These are enumerated in order, by increasing document number, within that by
|
||||
* increasing start position and finally by increasing end position.
|
||||
*/
|
||||
public abstract class Spans extends DocIdSetIterator {
|
||||
public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
|
||||
|
||||
/** Returns the document number of the current match. Initially invalid. */
|
||||
public abstract int doc();
|
||||
|
||||
/** Returns the start position of the current match. Initially invalid. */
|
||||
public abstract int start();
|
||||
|
||||
/** Returns the end position of the current match. Initially invalid. */
|
||||
public abstract int end();
|
||||
|
||||
/**
|
||||
* Returns the payload data for the current span.
|
||||
* This is invalid until {@link #next()} is called for
|
||||
* the first time.
|
||||
* Returns the next start position for the current doc.
|
||||
* There is always at least one start/end position per doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int nextStartPosition() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the start position in the current doc, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int startPosition();
|
||||
|
||||
/**
|
||||
* Returns the end position for the current start position, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int endPosition();
|
||||
|
||||
/**
|
||||
* Returns the payload data for the current start/end position.
|
||||
* This is only valid after {@link #nextStartPosition()}
|
||||
* returned an available start position.
|
||||
* This method must not be called more than once after each call
|
||||
* of {@link #next()}. However, most payloads are loaded lazily,
|
||||
* of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
|
||||
* so if the payload data for the current position is not needed,
|
||||
* this method may not be called at all for performance reasons. An ordered
|
||||
* SpanQuery does not lazy load, so if you have payloads in your index and
|
||||
* you do not want ordered SpanNearQuerys to collect payloads, you can
|
||||
* disable collection with a constructor option.<br>
|
||||
* this method may not be called at all for performance reasons.
|
||||
* <br>
|
||||
* Note that the return type is a collection, thus the ordering should not be relied upon.
|
||||
* <br>
|
||||
|
@ -76,25 +66,35 @@ public abstract class Spans {
|
|||
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
// TODO: Remove warning after API has been finalized
|
||||
public abstract Collection<byte[]> getPayload() throws IOException;
|
||||
|
||||
/**
|
||||
* Checks if a payload can be loaded at this position.
|
||||
* Checks if a payload can be loaded at the current start/end position.
|
||||
* <p>
|
||||
* Payloads can only be loaded once per call to
|
||||
* {@link #next()}.
|
||||
* {@link #nextStartPosition()}.
|
||||
*
|
||||
* @return true if there is a payload available at this position that can be loaded
|
||||
* @return true if there is a payload available at this start/end position
|
||||
* that can be loaded
|
||||
*/
|
||||
public abstract boolean isPayloadAvailable() throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the estimated cost of this spans.
|
||||
* <p>
|
||||
* This is generally an upper bound of the number of documents this iterator
|
||||
* might match, but may be a rough heuristic, hardcoded value, or otherwise
|
||||
* completely inaccurate.
|
||||
* Optional method: Return a {@link TwoPhaseIterator} view of this
|
||||
* {@link Spans}. A return value of {@code null} indicates that
|
||||
* two-phase iteration is not supported.
|
||||
*
|
||||
* Note that the returned {@link TwoPhaseIterator}'s
|
||||
* {@link TwoPhaseIterator#approximation() approximation} must
|
||||
* advance synchronously with this iterator: advancing the approximation must
|
||||
* advance this iterator and vice-versa.
|
||||
*
|
||||
* Implementing this method is typically useful on {@link Spans}s
|
||||
* that have a high per-document overhead in order to confirm matches.
|
||||
*
|
||||
* The default implementation returns {@code null}.
|
||||
*/
|
||||
public abstract long cost();
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,10 +24,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Expert:
|
||||
* Public for extension only
|
||||
* Public for extension only.
|
||||
* This does not work correctly for terms that indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
public class TermSpans extends Spans {
|
||||
protected final PostingsEnum postings;
|
||||
|
@ -39,65 +41,67 @@ public class TermSpans extends Spans {
|
|||
protected boolean readPayload;
|
||||
|
||||
public TermSpans(PostingsEnum postings, Term term) {
|
||||
this.postings = postings;
|
||||
this.term = term;
|
||||
doc = -1;
|
||||
}
|
||||
|
||||
// only for EmptyTermSpans (below)
|
||||
TermSpans() {
|
||||
term = null;
|
||||
postings = null;
|
||||
this.postings = Objects.requireNonNull(postings);
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.doc = -1;
|
||||
this.position = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (count == freq) {
|
||||
if (postings == null) {
|
||||
return false;
|
||||
}
|
||||
doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
public int nextDoc() throws IOException {
|
||||
doc = postings.nextDoc();
|
||||
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
freq = postings.freq();
|
||||
assert freq >= 1;
|
||||
count = 0;
|
||||
}
|
||||
position = postings.nextPosition();
|
||||
count++;
|
||||
readPayload = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
assert target > doc;
|
||||
doc = postings.advance(target);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
freq = postings.freq();
|
||||
count = 0;
|
||||
position = postings.nextPosition();
|
||||
count++;
|
||||
readPayload = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
position = -1;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > doc;
|
||||
doc = postings.advance(target);
|
||||
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
freq = postings.freq();
|
||||
assert freq >= 1;
|
||||
count = 0;
|
||||
}
|
||||
position = -1;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (count == freq) {
|
||||
assert position != NO_MORE_POSITIONS;
|
||||
return position = NO_MORE_POSITIONS;
|
||||
}
|
||||
int prevPosition = position;
|
||||
position = postings.nextPosition();
|
||||
assert position >= prevPosition : "prevPosition="+prevPosition+" > position="+position;
|
||||
assert position != NO_MORE_POSITIONS; // int endPosition not possible
|
||||
count++;
|
||||
readPayload = false;
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return position + 1;
|
||||
public int startPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return (position == -1) ? -1
|
||||
: (position != NO_MORE_POSITIONS) ? position + 1
|
||||
: NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -105,7 +109,6 @@ public class TermSpans extends Spans {
|
|||
return postings.cost();
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
final BytesRef payload = postings.getPayload();
|
||||
|
@ -120,7 +123,6 @@ public class TermSpans extends Spans {
|
|||
return Collections.singletonList(bytes);
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return readPayload == false && postings.getPayload() != null;
|
||||
|
@ -129,55 +131,12 @@ public class TermSpans extends Spans {
|
|||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + term.toString() + ")@" +
|
||||
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
|
||||
(doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC"
|
||||
: doc + " - " + (position == NO_MORE_POSITIONS ? "ENDPOS" : position));
|
||||
}
|
||||
|
||||
public PostingsEnum getPostings() {
|
||||
return postings;
|
||||
}
|
||||
|
||||
private static final class EmptyTermSpans extends TermSpans {
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans();
|
||||
}
|
||||
|
|
|
@ -18,14 +18,18 @@
|
|||
/**
|
||||
* The calculus of spans.
|
||||
*
|
||||
* <p>A span is a <code><doc,startPosition,endPosition></code> tuple.</p>
|
||||
* <p>A span is a <code><doc,startPosition,endPosition></code> tuple that is enumerated by
|
||||
* class {@link org.apache.lucene.search.spans.Spans Spans}.
|
||||
* </p>
|
||||
*
|
||||
* <p>The following span query operators are implemented:
|
||||
*
|
||||
* <ul>
|
||||
*
|
||||
* <li>A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans
|
||||
* containing a particular {@link org.apache.lucene.index.Term Term}.</li>
|
||||
* containing a particular {@link org.apache.lucene.index.Term Term}.
|
||||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
* </li>
|
||||
*
|
||||
* <li> A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans
|
||||
* which occur near one another, and can be used to implement things like
|
||||
|
|
|
@ -238,18 +238,20 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println("\ngetPayloadSpans test");
|
||||
}
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
while (pspans.next()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("doc " + pspans.doc() + ": span " + pspans.start()
|
||||
+ " to " + pspans.end());
|
||||
}
|
||||
Collection<byte[]> payloads = pspans.getPayload();
|
||||
sawZero |= pspans.start() == 0;
|
||||
for (byte[] bytes : payloads) {
|
||||
count++;
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
|
||||
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
|
||||
+ " to " + pspans.endPosition());
|
||||
}
|
||||
Collection<byte[]> payloads = pspans.getPayload();
|
||||
sawZero |= pspans.startPosition() == 0;
|
||||
for (byte[] bytes : payloads) {
|
||||
count++;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -257,20 +259,20 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
assertEquals(5, count);
|
||||
|
||||
// System.out.println("\ngetSpans test");
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
count = 0;
|
||||
sawZero = false;
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
sawZero |= spans.start() == 0;
|
||||
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
|
||||
// spans.end());
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
sawZero |= spans.startPosition() == 0;
|
||||
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
|
||||
// spans.end());
|
||||
}
|
||||
}
|
||||
assertEquals(4, count);
|
||||
assertTrue(sawZero);
|
||||
|
||||
// System.out.println("\nPayloadSpanUtil test");
|
||||
|
||||
sawZero = false;
|
||||
PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
|
||||
Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
|
||||
|
|
|
@ -160,7 +160,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
|
||||
}
|
||||
CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
/*float score = hits.score(0);
|
||||
for (int i =1; i < hits.length(); i++)
|
||||
|
@ -211,13 +211,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
}
|
||||
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
|
||||
CheckHits.checkExplanations(query, "field", searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
//should be two matches per document
|
||||
int count = 0;
|
||||
//100 hits times 2 matches per hit, we should have 200 in count
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
assertTrue(count + " does not equal: " + 200, count == 200);
|
||||
}
|
||||
|
@ -253,13 +255,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
}
|
||||
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
|
||||
CheckHits.checkExplanations(query, "field", searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
//should be two matches per document
|
||||
int count = 0;
|
||||
//100 hits times 2 matches per hit, we should have 200 in count
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -42,27 +41,32 @@ final class JustCompileSearchSpans {
|
|||
static final class JustCompileSpans extends Spans {
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
@ -103,6 +107,36 @@ final class JustCompileSearchSpans {
|
|||
|
||||
static final class JustCompilePayloadSpans extends Spans {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -113,31 +147,6 @@ final class JustCompileSearchSpans {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -147,7 +156,7 @@ final class JustCompileSearchSpans {
|
|||
|
||||
static final class JustCompileSpanScorer extends SpanScorer {
|
||||
|
||||
protected JustCompileSpanScorer(Spans spans, Weight weight,
|
||||
protected JustCompileSpanScorer(Spans spans, SpanWeight weight,
|
||||
Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
}
|
||||
|
|
|
@ -18,19 +18,18 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -39,141 +38,20 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
* NOTE: This should be used for testing purposes only
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class MultiSpansWrapper extends Spans { // can't be package private due to payloads
|
||||
public class MultiSpansWrapper {
|
||||
|
||||
private SpanQuery query;
|
||||
private List<LeafReaderContext> leaves;
|
||||
private int leafOrd = 0;
|
||||
private Spans current;
|
||||
private Map<Term,TermContext> termContexts;
|
||||
private final int numLeaves;
|
||||
|
||||
private MultiSpansWrapper(List<LeafReaderContext> leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
|
||||
this.query = query;
|
||||
this.leaves = leaves;
|
||||
this.numLeaves = leaves.size();
|
||||
this.termContexts = termContexts;
|
||||
}
|
||||
|
||||
public static Spans wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
|
||||
public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
|
||||
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
|
||||
LeafReaderContext lrContext = lr.getContext();
|
||||
Query rewrittenQuery = spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
|
||||
HashSet<Term> termSet = new HashSet<>();
|
||||
rewrittenQuery.extractTerms(termSet);
|
||||
Map<Term,TermContext> termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
query.extractTerms(terms);
|
||||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(topLevelReaderContext, term));
|
||||
for (Term term: termSet) {
|
||||
TermContext termContext = TermContext.build(lrContext, term);
|
||||
termContexts.put(term, termContext);
|
||||
}
|
||||
final List<LeafReaderContext> leaves = topLevelReaderContext.leaves();
|
||||
if(leaves.size() == 1) {
|
||||
final LeafReaderContext ctx = leaves.get(0);
|
||||
return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
return new MultiSpansWrapper(leaves, query, termContexts);
|
||||
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
|
||||
return actSpans;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (leafOrd >= numLeaves) {
|
||||
return false;
|
||||
}
|
||||
if (current == null) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
while(true) {
|
||||
if (current.next()) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < numLeaves) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (leafOrd >= numLeaves) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int subIndex = ReaderUtil.subIndex(target, leaves);
|
||||
assert subIndex >= leafOrd;
|
||||
if (subIndex != leafOrd) {
|
||||
final LeafReaderContext ctx = leaves.get(subIndex);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
leafOrd = subIndex;
|
||||
} else if (current == null) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
while (true) {
|
||||
if (target < leaves.get(leafOrd).docBase) {
|
||||
// target was in the previous slice
|
||||
if (current.next()) {
|
||||
return true;
|
||||
}
|
||||
} else if (current.skipTo(target - leaves.get(leafOrd).docBase)) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < numLeaves) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.doc() + leaves.get(leafOrd).docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
if (current == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return current.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
if (current == null) {
|
||||
return false;
|
||||
}
|
||||
return current.isPayloadAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return Integer.MAX_VALUE; // just for tests
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -651,47 +651,6 @@ public class TestBasics extends LuceneTestCase {
|
|||
1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansSkipTo() throws Exception {
|
||||
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
|
||||
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
|
||||
Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
|
||||
Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
|
||||
|
||||
assertTrue(s1.next());
|
||||
assertTrue(s2.next());
|
||||
|
||||
boolean hasMore = true;
|
||||
|
||||
do {
|
||||
hasMore = skipToAccordingToJavaDocs(s1, s1.doc() + 1);
|
||||
assertEquals(hasMore, s2.skipTo(s2.doc() + 1));
|
||||
assertEquals(s1.doc(), s2.doc());
|
||||
} while (hasMore);
|
||||
}
|
||||
|
||||
/** Skips to the first match beyond the current, whose document number is
|
||||
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||
* a match. <p>Behaves as if written: <pre>
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
* } while (target > doc());
|
||||
* return true;
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
private boolean skipToAccordingToJavaDocs(Spans s, int target)
|
||||
throws Exception {
|
||||
do {
|
||||
if (!s.next())
|
||||
return false;
|
||||
} while (target > s.doc());
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
CheckHits.checkHits(random(), query, "field", searcher, results);
|
||||
}
|
||||
|
|
|
@ -258,37 +258,19 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
SpanQuery q2 = new SpanTermQuery(new Term("first", "james"));
|
||||
SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender"));
|
||||
check(q, new int[] { 0, 1, 2, 3, 4 });
|
||||
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,0,1), s(span));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,1,2), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,1,2), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,2,3), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(3,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(4,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(4,1,2), s(span));
|
||||
|
||||
assertEquals(false, span.next());
|
||||
TestSpans.tstNextSpans(span, 0,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,0,1);
|
||||
TestSpans.tstNextSpans(span, 2,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,2,3);
|
||||
TestSpans.tstNextSpans(span, 3,0,1);
|
||||
TestSpans.tstNextSpans(span, 4,0,1);
|
||||
TestSpans.tstNextSpans(span, 4,1,2);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
public void testSpans1() throws Exception {
|
||||
|
@ -300,19 +282,22 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
check(qA, new int[] { 0, 1, 2, 4 });
|
||||
check(qB, new int[] { 0, 1, 2, 4 });
|
||||
|
||||
Spans spanA = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qA);
|
||||
Spans spanB = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qB);
|
||||
Spans spanA = MultiSpansWrapper.wrap(searcher.getIndexReader(), qA);
|
||||
Spans spanB = MultiSpansWrapper.wrap(searcher.getIndexReader(), qB);
|
||||
|
||||
while (spanA.next()) {
|
||||
assertTrue("spanB not still going", spanB.next());
|
||||
assertEquals("spanA not equal spanB", s(spanA), s(spanB));
|
||||
while (spanA.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc());
|
||||
while (spanA.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
assertEquals("spanB start position", spanA.startPosition(), spanB.nextStartPosition());
|
||||
assertEquals("spanB end position", spanA.endPosition(), spanB.endPosition());
|
||||
}
|
||||
assertEquals("spanB start position", Spans.NO_MORE_POSITIONS, spanB.nextStartPosition());
|
||||
}
|
||||
assertTrue("spanB still going even tough spanA is done", !(spanB.next()));
|
||||
|
||||
assertEquals("spanB end doc", Spans.NO_MORE_DOCS, spanB.nextDoc());
|
||||
}
|
||||
|
||||
public void testSpans2() throws Exception {
|
||||
assumeTrue("Broken scoring: LUCENE-3723",
|
||||
assumeTrue("Broken scoring: LUCENE-3723",
|
||||
searcher.getSimilarity() instanceof TFIDFSimilarity);
|
||||
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
|
||||
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
|
||||
|
@ -322,30 +307,17 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
{ new FieldMaskingSpanQuery(qA, "id"),
|
||||
new FieldMaskingSpanQuery(qB, "id") }, -1, false );
|
||||
check(q, new int[] { 0, 1, 2, 3 });
|
||||
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,1,2), s(span));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,2,3), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(3,0,1), s(span));
|
||||
|
||||
assertEquals(false, span.next());
|
||||
TestSpans.tstNextSpans(span, 0,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,0,1);
|
||||
TestSpans.tstNextSpans(span, 2,2,3);
|
||||
TestSpans.tstNextSpans(span, 3,0,1);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
public String s(Spans span) {
|
||||
return s(span.doc(), span.start(), span.end());
|
||||
}
|
||||
public String s(int doc, int start, int end) {
|
||||
return "s(" + doc + "," + start + "," + end +")";
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public String s(Spans span) {
|
||||
return s(span.doc(), span.start(), span.end());
|
||||
return s(span.docID(), span.startPosition(), span.endPosition());
|
||||
}
|
||||
public String s(int doc, int start, int end) {
|
||||
return "s(" + doc + "," + start + "," + end +")";
|
||||
|
@ -114,12 +114,10 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
|
||||
public void testNearSpansNext() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
TestSpans.tstNextSpans(span,0,0,3);
|
||||
TestSpans.tstNextSpans(span,1,0,4);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -127,51 +125,58 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
* same as next -- it's only applicable in this case since we know doc
|
||||
* does not contain more than one span
|
||||
*/
|
||||
public void testNearSpansSkipToLikeNext() throws Exception {
|
||||
public void testNearSpansAdvanceLikeNext() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(0));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(0, span.advance(0));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.skipTo(1));
|
||||
assertEquals(1, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.skipTo(2));
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansNextThenSkipTo() throws Exception {
|
||||
public void testNearSpansNextThenAdvance() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.skipTo(1));
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.next());
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
}
|
||||
|
||||
public void testNearSpansNextThenSkipPast() throws Exception {
|
||||
public void testNearSpansNextThenAdvancePast() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(false, span.skipTo(2));
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipPast() throws Exception {
|
||||
public void testNearSpansAdvancePast() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(false, span.skipTo(2));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipTo0() throws Exception {
|
||||
public void testNearSpansAdvanceTo0() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(0));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(0, span.advance(0));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipTo1() throws Exception {
|
||||
public void testNearSpansAdvanceTo1() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(1));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(1, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
}
|
||||
|
||||
|
|
|
@ -67,12 +67,12 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq;
|
||||
Spans spans;
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 1, 1, 1);
|
||||
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 0, 0, 0);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanFirstQuery sfq;
|
||||
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader.getContext(), sfq);
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq);
|
||||
checkSpans(spans, 109, 1, 1, 1);
|
||||
//Test more complicated subclause
|
||||
SpanQuery[] clauses = new SpanQuery[2];
|
||||
|
@ -91,11 +91,11 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
|
||||
match = new SpanNearQuery(clauses, 0, true);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
|
||||
match = new SpanNearQuery(clauses, 0, false);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
|
||||
checkSpans(MultiSpansWrapper.wrap(reader.getContext(), snq), 1,new int[]{2});
|
||||
checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2});
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -129,10 +129,8 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
Spans spans;
|
||||
IndexSearcher searcher = getSearcher();
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 0, null);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq);
|
||||
assertNull(spans);
|
||||
|
||||
SpanQuery[] clauses = new SpanQuery[3];
|
||||
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
|
||||
|
@ -140,7 +138,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
|
||||
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
|
||||
|
@ -151,7 +149,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
spanNearQuery = new SpanNearQuery(clauses, 6, true);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
|
@ -174,7 +172,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
// yy within 6 of xx within 6 of rr
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
closeIndexReader.close();
|
||||
|
@ -205,7 +203,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses3[1] = snq;
|
||||
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
|
@ -243,7 +241,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{8, 8});
|
||||
closeIndexReader.close();
|
||||
|
@ -267,16 +265,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -303,15 +303,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
for (final byte[] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -338,16 +341,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -395,31 +400,22 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
//each position match should have a span associated with it, since there is just one underlying term query, there should
|
||||
//only be one entry in the span
|
||||
int seen = 0;
|
||||
while (spans.next() == true)
|
||||
{
|
||||
//if we expect payloads, then isPayloadAvailable should be true
|
||||
if (expectedNumPayloads > 0) {
|
||||
assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable()
|
||||
+ " and it should be: " + (expectedNumPayloads > 0),
|
||||
spans.isPayloadAvailable() == true);
|
||||
} else {
|
||||
assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false);
|
||||
}
|
||||
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads);
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength,
|
||||
thePayload.length == expectedPayloadLength);
|
||||
assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte);
|
||||
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable());
|
||||
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
assertEquals("payload size", expectedNumPayloads, payload.size());
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertEquals("payload length", expectedPayloadLength, thePayload.length);
|
||||
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
|
||||
}
|
||||
}
|
||||
|
||||
seen++;
|
||||
}
|
||||
seen++;
|
||||
}
|
||||
assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
|
||||
assertEquals("expectedNumSpans", expectedNumSpans, seen);
|
||||
}
|
||||
|
||||
private IndexSearcher getSearcher() throws Exception {
|
||||
|
@ -446,27 +442,28 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
|
||||
int cnt = 0;
|
||||
|
||||
while (spans.next() == true) {
|
||||
if(VERBOSE)
|
||||
System.out.println("\nSpans Dump --");
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
|
||||
+ new String(bytes, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if(VERBOSE)
|
||||
System.out.println("\nSpans Dump --");
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
|
||||
+ new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
assertEquals("payload size", numPayloads[cnt], payload.size());
|
||||
} else { // no payload available
|
||||
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 );
|
||||
}
|
||||
|
||||
assertEquals(numPayloads[cnt],payload.size());
|
||||
} else {
|
||||
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 );
|
||||
cnt++;
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
|
||||
assertEquals(numSpans, cnt);
|
||||
assertEquals("expected numSpans", numSpans, cnt);
|
||||
}
|
||||
|
||||
final class PayloadAnalyzer extends Analyzer {
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -201,117 +200,55 @@ public class TestSpans extends LuceneTestCase {
|
|||
makeSpanTermQuery("t3") },
|
||||
slop,
|
||||
ordered);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
|
||||
|
||||
assertTrue("first range", spans.next());
|
||||
assertEquals("first doc", 11, spans.doc());
|
||||
assertEquals("first start", 0, spans.start());
|
||||
assertEquals("first end", 4, spans.end());
|
||||
assertEquals("first doc", 11, spans.nextDoc());
|
||||
assertEquals("first start", 0, spans.nextStartPosition());
|
||||
assertEquals("first end", 4, spans.endPosition());
|
||||
|
||||
assertTrue("second range", spans.next());
|
||||
assertEquals("second doc", 11, spans.doc());
|
||||
assertEquals("second start", 2, spans.start());
|
||||
assertEquals("second end", 6, spans.end());
|
||||
assertEquals("second start", 2, spans.nextStartPosition());
|
||||
assertEquals("second end", 6, spans.endPosition());
|
||||
|
||||
assertFalse("third range", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
|
||||
public void testSpanNearUnOrdered() throws Exception {
|
||||
|
||||
//See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
|
||||
SpanNearQuery snq;
|
||||
snq = new SpanNearQuery(
|
||||
SpanNearQuery senq;
|
||||
senq = new SpanNearQuery(
|
||||
new SpanQuery[] {
|
||||
makeSpanTermQuery("u1"),
|
||||
makeSpanTermQuery("u2") },
|
||||
0,
|
||||
false);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 1, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 10, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false);
|
||||
Spans spans = MultiSpansWrapper.wrap(reader, senq);
|
||||
tstNextSpans(spans, 4, 1, 3);
|
||||
tstNextSpans(spans, 5, 2, 4);
|
||||
tstNextSpans(spans, 8, 2, 4);
|
||||
tstNextSpans(spans, 9, 0, 2);
|
||||
tstNextSpans(spans, 10, 0, 2);
|
||||
tstEndSpans(spans);
|
||||
|
||||
SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"),
|
||||
makeSpanTermQuery("u2")}, 0, false);
|
||||
snq = new SpanNearQuery(
|
||||
senq = new SpanNearQuery(
|
||||
new SpanQuery[] {
|
||||
u1u2,
|
||||
makeSpanTermQuery("u2")
|
||||
},
|
||||
1,
|
||||
false);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
//unordered spans can be subsets
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 1, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 10, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Has next and it shouldn't", spans.next() == false);
|
||||
spans = MultiSpansWrapper.wrap(reader, senq);
|
||||
tstNextSpans(spans, 4, 0, 3);
|
||||
tstNextSpans(spans, 4, 1, 3); // unordered spans can be subsets
|
||||
tstNextSpans(spans, 5, 0, 4);
|
||||
tstNextSpans(spans, 5, 2, 4);
|
||||
tstNextSpans(spans, 8, 0, 4);
|
||||
tstNextSpans(spans, 8, 2, 4);
|
||||
tstNextSpans(spans, 9, 0, 2);
|
||||
tstNextSpans(spans, 9, 0, 4);
|
||||
tstNextSpans(spans, 10, 0, 2);
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
|
||||
|
@ -321,21 +258,40 @@ public class TestSpans extends LuceneTestCase {
|
|||
for (int i = 0; i < terms.length; i++) {
|
||||
sqa[i] = makeSpanTermQuery(terms[i]);
|
||||
}
|
||||
return MultiSpansWrapper.wrap(searcher.getTopReaderContext(), new SpanOrQuery(sqa));
|
||||
return MultiSpansWrapper.wrap(searcher.getIndexReader(), new SpanOrQuery(sqa));
|
||||
}
|
||||
|
||||
private void tstNextSpans(Spans spans, int doc, int start, int end)
|
||||
throws Exception {
|
||||
assertTrue("next", spans.next());
|
||||
assertEquals("doc", doc, spans.doc());
|
||||
assertEquals("start", start, spans.start());
|
||||
assertEquals("end", end, spans.end());
|
||||
public static void tstNextSpans(Spans spans, int doc, int start, int end) throws IOException {
|
||||
if (spans.docID() >= doc) {
|
||||
assertEquals("docId", doc, spans.docID());
|
||||
} else { // nextDoc needed before testing start/end
|
||||
if (spans.docID() >= 0) {
|
||||
assertEquals("nextStartPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.nextStartPosition());
|
||||
assertEquals("endPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.endPosition());
|
||||
}
|
||||
assertEquals("nextDoc", doc, spans.nextDoc());
|
||||
if (doc != Spans.NO_MORE_DOCS) {
|
||||
assertEquals("first startPosition", -1, spans.startPosition());
|
||||
assertEquals("first endPosition", -1, spans.endPosition());
|
||||
}
|
||||
}
|
||||
if (doc != Spans.NO_MORE_DOCS) {
|
||||
assertEquals("nextStartPosition", start, spans.nextStartPosition());
|
||||
assertEquals("startPosition", start, spans.startPosition());
|
||||
assertEquals("endPosition", end, spans.endPosition());
|
||||
}
|
||||
}
|
||||
|
||||
public static void tstEndSpans(Spans spans) throws Exception {
|
||||
if (spans != null) { // null Spans is empty
|
||||
tstNextSpans(spans, Spans.NO_MORE_DOCS, -2, -2); // start and end positions will be ignored
|
||||
}
|
||||
}
|
||||
|
||||
public void testSpanOrEmpty() throws Exception {
|
||||
Spans spans = orSpans(new String[0]);
|
||||
assertFalse("empty next", spans.next());
|
||||
|
||||
tstEndSpans(spans);
|
||||
|
||||
SpanOrQuery a = new SpanOrQuery();
|
||||
SpanOrQuery b = new SpanOrQuery();
|
||||
assertTrue("empty should equal", a.equals(b));
|
||||
|
@ -344,24 +300,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
public void testSpanOrSingle() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w5"});
|
||||
tstNextSpans(spans, 0, 4, 5);
|
||||
assertFalse("final next", spans.next());
|
||||
}
|
||||
|
||||
public void testSpanOrMovesForward() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w1", "xx"});
|
||||
|
||||
spans.next();
|
||||
int doc = spans.doc();
|
||||
assertEquals(0, doc);
|
||||
|
||||
spans.skipTo(0);
|
||||
doc = spans.doc();
|
||||
|
||||
// LUCENE-1583:
|
||||
// according to Spans, a skipTo to the same doc or less
|
||||
// should still call next() on the underlying Spans
|
||||
assertEquals(1, doc);
|
||||
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrDouble() throws Exception {
|
||||
|
@ -370,17 +309,15 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 2, 3, 4);
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrDoubleSkip() throws Exception {
|
||||
public void testSpanOrDoubleAdvance() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w5", "yy"});
|
||||
assertTrue("initial skipTo", spans.skipTo(3));
|
||||
assertEquals("doc", 3, spans.doc());
|
||||
assertEquals("start", 4, spans.start());
|
||||
assertEquals("end", 5, spans.end());
|
||||
assertEquals("initial advance", 3, spans.advance(3));
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrUnused() throws Exception {
|
||||
|
@ -389,7 +326,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 2, 3, 4);
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrTripleSameDoc() throws Exception {
|
||||
|
@ -400,7 +337,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 11, 3, 4);
|
||||
tstNextSpans(spans, 11, 4, 5);
|
||||
tstNextSpans(spans, 11, 5, 6);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanScorerZeroSloppyFreq() throws Exception {
|
||||
|
@ -542,11 +479,15 @@ public class TestSpans extends LuceneTestCase {
|
|||
SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
|
||||
SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
|
||||
SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
|
||||
|
||||
int i = 0;
|
||||
while (spans.next()){
|
||||
i++;
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS){
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests Spans (v2)
|
||||
*
|
||||
*/
|
||||
public class TestSpansEnum extends LuceneTestCase {
|
||||
private static IndexSearcher searcher;
|
||||
private static IndexReader reader;
|
||||
private static Directory directory;
|
||||
|
||||
static final class SimplePayloadFilter extends TokenFilter {
|
||||
int pos;
|
||||
final PayloadAttribute payloadAttr;
|
||||
final CharTermAttribute termAttr;
|
||||
|
||||
public SimplePayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
pos = 0;
|
||||
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
||||
termAttr = input.addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8)));
|
||||
pos++;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static Analyzer simplePayloadAnalyzer;
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
simplePayloadAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(simplePayloadAnalyzer)
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
for (int i = 100; i < 110; i++) {
|
||||
Document doc = new Document(); // doc id 10-19 have 100-109
|
||||
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
reader = writer.getReader();
|
||||
searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
directory.close();
|
||||
searcher = null;
|
||||
reader = null;
|
||||
directory = null;
|
||||
simplePayloadAnalyzer = null;
|
||||
}
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
CheckHits.checkHits(random(), query, "field", searcher, results);
|
||||
}
|
||||
|
||||
SpanTermQuery spanTQ(String term) {
|
||||
return new SpanTermQuery(new Term("field", term));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t2 = spanTQ("two");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t2);
|
||||
checkHits(soq, new int[] {1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr2() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t11 = spanTQ("eleven");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t11);
|
||||
checkHits(soq, new int[] {1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr3() throws Exception {
|
||||
SpanTermQuery t12 = spanTQ("twelve");
|
||||
SpanTermQuery t11 = spanTQ("eleven");
|
||||
SpanOrQuery soq = new SpanOrQuery(t12, t11);
|
||||
checkHits(soq, new int[] {});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOrNot1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t2 = spanTQ("two");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t2);
|
||||
SpanNotQuery snq = new SpanNotQuery(soq, t1);
|
||||
checkHits(snq, new int[] {2,12});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 0);
|
||||
checkHits(snq, new int[] {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // include all "one hundred ..."
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter2() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 1, 0);
|
||||
checkHits(snq, new int[] {}); // exclude all "one hundred ..."
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter3() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 1);
|
||||
checkHits(snq, new int[] {10, 12, 13, 14, 15, 16, 17, 18, 19}); // exclude "one hundred one"
|
||||
}
|
||||
}
|
|
@ -308,10 +308,11 @@ public class WeightedSpanTermExtractor {
|
|||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
// collect span positions
|
||||
while (spans.next()) {
|
||||
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (spanPositions.size() == 0) {
|
||||
|
|
|
@ -681,7 +681,7 @@ public class TestMultiTermHighlighting extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
|
||||
Query query = new SpanNearQuery(new SpanQuery[] { childQuery }, 0, true);
|
||||
Query query = new SpanNearQuery(new SpanQuery[] { childQuery, childQuery }, 0, false);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits);
|
||||
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
|
||||
|
|
Loading…
Reference in New Issue