LUCENE-6371: Add collection API to Spans, remove payload methods

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2015-05-19 09:02:38 +00:00
parent 88941936ca
commit a0561676a0
38 changed files with 748 additions and 626 deletions

View File

@ -189,6 +189,12 @@ API Changes
* LUCENE-6445: Two new methods in Highlighter's TokenSources; the existing
methods are now marked deprecated. (David Smiley)
* LUCENE-6371: Payload collection from Spans is moved to a more generic
SpanCollector framework. Spans no longer implements .hasPayload() and
.getPayload() methods, and instead exposes a collect() method that allows
the collection of arbitrary postings information. (Alan Woodward, David
Smiley, Paul Elschot)
Other
* LUCENE-6413: Test runner should report the number of suites completed/

View File

@ -17,11 +17,6 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
@ -29,8 +24,6 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.NearSpansOrdered;
import org.apache.lucene.search.spans.NearSpansUnordered;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
@ -40,6 +33,11 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Objects;
/**
* This class is very similar to
* {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors
@ -55,8 +53,10 @@ import org.apache.lucene.util.ToStringUtils;
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
*/
public class PayloadNearQuery extends SpanNearQuery {
protected String fieldName;
protected PayloadFunction function;
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
this(clauses, slop, inOrder, new AveragePayloadFunction());
@ -129,17 +129,18 @@ public class PayloadNearQuery extends SpanNearQuery {
}
public class PayloadNearSpanWeight extends SpanWeight {
public PayloadNearSpanWeight(SpanQuery query, IndexSearcher searcher)
throws IOException {
super(query, searcher);
super(query, searcher, payloadCollector);
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
Spans spans = query.getSpans(context, acceptDocs, termContexts);
Spans spans = query.getSpans(context, acceptDocs, termContexts, payloadCollector);
return (spans == null)
? null
: new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
: new PayloadNearSpanScorer(spans, this, similarity.simScorer(stats, context));
}
@Override
@ -176,31 +177,11 @@ public class PayloadNearQuery extends SpanNearQuery {
protected float payloadScore;
private int payloadsSeen;
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
}
// Get the payloads associated with all underlying subspans
public void getPayloads(Spans[] subSpans) throws IOException {
for (int i = 0; i < subSpans.length; i++) {
if (subSpans[i] instanceof NearSpansOrdered) {
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
} else if (subSpans[i] instanceof NearSpansUnordered) {
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
}
}
}
// TODO change the whole spans api to use bytesRef, or nuke spans
BytesRef scratch = new BytesRef();
@ -237,9 +218,9 @@ public class PayloadNearQuery extends SpanNearQuery {
do {
int matchLength = spans.endPosition() - startPos;
freq += docScorer.computeSlopFactor(matchLength);
Spans[] spansArr = new Spans[1];
spansArr[0] = spans;
getPayloads(spansArr);
payloadCollector.reset();
spans.collect(payloadCollector);
processPayloads(payloadCollector.getPayloads(), startPos, spans.endPosition());
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
}

View File

@ -0,0 +1,103 @@
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.BufferedSpanCollector;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
/**
* SpanCollector implementation that collects payloads from a {@link Spans}
*/
public class PayloadSpanCollector implements SpanCollector {
private final Collection<byte[]> payloads = new ArrayList<>();
BufferedPayloadCollector bufferedCollector;
public Collection<byte[]> getPayloads() {
return payloads;
}
@Override
public void reset() {
payloads.clear();
}
@Override
public int requiredPostings() {
return PostingsEnum.PAYLOADS;
}
@Override
public void collectLeaf(PostingsEnum postings, Term term) throws IOException {
BytesRef payload = postings.getPayload();
if (payload == null)
return;
final byte[] bytes = new byte[payload.length];
System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
payloads.add(bytes);
}
@Override
public BufferedSpanCollector buffer() {
if (bufferedCollector == null)
bufferedCollector = new BufferedPayloadCollector();
bufferedCollector.reset();
return bufferedCollector;
}
@Override
public SpanCollector bufferedCollector() {
if (bufferedCollector == null)
bufferedCollector = new BufferedPayloadCollector();
return bufferedCollector.candidateCollector;
}
class BufferedPayloadCollector implements BufferedSpanCollector {
final Collection<byte[]> buffer = new ArrayList<>();
PayloadSpanCollector candidateCollector = new PayloadSpanCollector();
void reset() {
buffer.clear();
}
@Override
public void collectCandidate(Spans spans) throws IOException {
candidateCollector.reset();
spans.collect(candidateCollector);
}
@Override
public void accept() {
buffer.addAll(candidateCollector.payloads);
}
@Override
public void replay() {
payloads.addAll(buffer);
}
}
}

View File

@ -17,15 +17,6 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
@ -46,6 +37,15 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
/**
* Experimental class to get set of payloads for most standard Lucene queries.
* Operates like Highlighter - IndexReader should only contain doc of interest,
@ -187,17 +187,16 @@ public class PayloadSpanUtil {
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term));
}
PayloadSpanCollector collector = new PayloadSpanCollector();
for (LeafReaderContext leafReaderContext : context.leaves()) {
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts, collector);
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
collector.reset();
spans.collect(collector);
payloads.addAll(collector.getPayloads());
}
}
}

View File

@ -17,9 +17,6 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
@ -28,6 +25,8 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.BufferedSpanCollector;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -37,6 +36,9 @@ import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Objects;
/**
* This class is very similar to
* {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors
@ -67,19 +69,52 @@ public class PayloadTermQuery extends SpanTermQuery {
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PayloadTermWeight(this, searcher);
return new PayloadTermWeight(this, searcher, new PayloadTermCollector());
}
protected class PayloadTermCollector implements SpanCollector {
BytesRef payload;
@Override
public void reset() {
payload = null;
}
@Override
public int requiredPostings() {
return PostingsEnum.PAYLOADS;
}
@Override
public void collectLeaf(PostingsEnum postings, Term term) throws IOException {
payload = postings.getPayload();
}
@Override
public BufferedSpanCollector buffer() {
throw new UnsupportedOperationException();
}
@Override
public SpanCollector bufferedCollector() {
throw new UnsupportedOperationException();
}
}
protected class PayloadTermWeight extends SpanWeight {
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher)
final PayloadTermCollector payloadCollector;
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher, PayloadTermCollector collector)
throws IOException {
super(query, searcher);
super(query, searcher, collector);
this.payloadCollector = collector;
}
@Override
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts, payloadCollector);
return (spans == null)
? null
: new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
@ -109,29 +144,22 @@ public class PayloadTermQuery extends SpanTermQuery {
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity);
payloadCollector.reset();
spans.collect(payloadCollector);
processPayload();
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
}
protected void processPayload(Similarity similarity) throws IOException {
if (spans.isPayloadAvailable()) {
final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
} else {
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
protected void processPayload() throws IOException {
float payloadFactor = payloadCollector.payload == null ? 1F :
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payloadCollector.payload);
payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(),
payloadsSeen, payloadScore, payloadFactor);
payloadsSeen++;
} else {
// zero out the payload?
}
}
/**

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.spans;
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -16,7 +16,12 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@ -30,10 +35,12 @@ import java.util.Objects;
* the given position.
*/
public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
protected final Collection<byte[]> payloadToMatch;
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
/**
* @param match The underlying {@link SpanQuery} to check
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
*/
public SpanNearPayloadCheckQuery(SpanNearQuery match, Collection<byte[]> payloadToMatch) {
@ -41,35 +48,41 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
this.payloadToMatch = Objects.requireNonNull(payloadToMatch);
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanWeight(this, searcher, payloadCollector);
}
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
boolean result = spans.isPayloadAvailable();
if (result == true) {
Collection<byte[]> candidate = spans.getPayload();
if (candidate.size() == payloadToMatch.size()) {
//TODO: check the byte arrays are the same
//hmm, can't rely on order here
int matches = 0;
for (byte[] candBytes : candidate) {
//Unfortunately, we can't rely on order, so we need to compare all
for (byte[] payBytes : payloadToMatch) {
if (Arrays.equals(candBytes, payBytes) == true) {
matches++;
break;
}
payloadCollector.reset();
spans.collect(payloadCollector);
Collection<byte[]> candidate = payloadCollector.getPayloads();
if (candidate.size() == payloadToMatch.size()) {
//TODO: check the byte arrays are the same
//hmm, can't rely on order here
int matches = 0;
for (byte[] candBytes : candidate) {
//Unfortunately, we can't rely on order, so we need to compare all
for (byte[] payBytes : payloadToMatch) {
if (Arrays.equals(candBytes, payBytes) == true) {
matches++;
break;
}
}
if (matches == payloadToMatch.size()){
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
}
if (matches == payloadToMatch.size()){
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.NO;
}
return AcceptStatus.NO;
}
@Override

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.spans;
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -16,7 +16,14 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.payloads.PayloadSpanCollector;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@ -30,11 +37,13 @@ import java.util.Iterator;
* the given position.
* <p>
* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
* Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
* Instead, use {@link org.apache.lucene.search.payloads.SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
*/
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
protected final Collection<byte[]> payloadToMatch;
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
/**
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
@ -48,29 +57,35 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
this.payloadToMatch = payloadToMatch;
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanWeight(this, searcher, payloadCollector);
}
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
boolean result = spans.isPayloadAvailable();
if (result == true){
Collection<byte[]> candidate = spans.getPayload();
if (candidate.size() == payloadToMatch.size()){
//TODO: check the byte arrays are the same
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
//check each of the byte arrays, in order
//hmm, can't rely on order here
for (byte[] candBytes : candidate) {
//if one is a mismatch, then return false
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
return AcceptStatus.NO;
}
payloadCollector.reset();
spans.collect(payloadCollector);
Collection<byte[]> candidate = payloadCollector.getPayloads();
if (candidate.size() == payloadToMatch.size()){
//TODO: check the byte arrays are the same
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
//check each of the byte arrays, in order
//hmm, can't rely on order here
for (byte[] candBytes : candidate) {
//if one is a mismatch, then return false
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
return AcceptStatus.NO;
}
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
return AcceptStatus.YES;
}
@Override

View File

@ -0,0 +1,67 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* Defines span collection for eager Span implementations, such as
* {@link org.apache.lucene.search.spans.NearSpansOrdered}
*
* @lucene.experimental
*/
public interface BufferedSpanCollector {
/**
* Collect information from a possible candidate
* @param spans the candidate Spans
* @throws IOException on error
*/
public void collectCandidate(Spans spans) throws IOException;
/**
* Confirm that the last candidate Spans has been accepted by the parent algorithm
*/
public void accept();
/**
* Replay buffered information back to the parent SpanCollector
*/
public void replay();
/**
* A default No-op BufferedSpanCollector
*/
public static final BufferedSpanCollector NO_OP = new BufferedSpanCollector() {
@Override
public void collectCandidate(Spans spans) throws IOException {
}
@Override
public void accept() {
}
@Override
public void replay() {
}
};
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Objects;
abstract class ContainSpans extends ConjunctionSpans {
@ -49,12 +48,8 @@ abstract class ContainSpans extends ConjunctionSpans {
}
@Override
public boolean isPayloadAvailable() throws IOException {
return sourceSpans.isPayloadAvailable();
public void collect(SpanCollector collector) throws IOException {
sourceSpans.collect(collector);
}
@Override
public Collection<byte[]> getPayload() throws IOException {
return sourceSpans.getPayload();
}
}

View File

@ -17,21 +17,20 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* <p>Wrapper to allow {@link SpanQuery} objects participate in composite
* single-field SpanQueries by 'lying' about their search field. That is,
@ -97,8 +96,8 @@ public class FieldMaskingSpanQuery extends SpanQuery {
// ...this is done to be more consistent with things like SpanFirstQuery
@Override
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return maskedQuery.getSpans(context, acceptDocs, termContexts);
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
return maskedQuery.getSpans(context, acceptDocs, termContexts, collector);
}
@Override

View File

@ -17,12 +17,11 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Objects;
import org.apache.lucene.search.TwoPhaseIterator;
import java.io.IOException;
import java.util.Objects;
/**
* A {@link Spans} implementation wrapping another spans instance,
* allowing to filter spans matches easily by implementing {@link #accept}
@ -110,17 +109,12 @@ public abstract class FilterSpans extends Spans {
return atFirstInCurrentDoc ? -1
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
}
@Override
public final Collection<byte[]> getPayload() throws IOException {
return in.getPayload();
}
@Override
public final boolean isPayloadAvailable() throws IOException {
return in.isPayloadAvailable();
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);
}
@Override
public final long cost() {
return in.cost();

View File

@ -19,12 +19,9 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.List;
import java.util.Collection;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them,
* and that does not need to collect payloads.
* To also collect payloads, see {@link NearSpansPayloadOrdered}.
* where the subspans do not overlap and have a maximum slop between them.
* <p>
* The formed spans only contains minimum slop matches.<br>
* The matching slop is computed from the distance(s) between
@ -41,6 +38,9 @@ import java.util.Collection;
* <pre>t1 t2 .. t3 </pre>
* <pre> t1 .. t2 t3</pre>
*
* Because the algorithm used to minimize the size of a match consumes
* child Spans eagerly, this uses a BufferedSpanCollector to collect
* information from subspans.
*
* Expert:
* Only public for subclassing. Most implementations should not need this class
@ -51,9 +51,13 @@ public class NearSpansOrdered extends NearSpans {
protected int matchStart = -1;
protected int matchEnd = -1;
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
protected final SpanCollector collector;
protected BufferedSpanCollector buffer;
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans, SpanCollector collector) throws IOException {
super(query, subSpans);
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
this.collector = collector;
}
@Override
@ -140,10 +144,15 @@ public class NearSpansOrdered extends NearSpans {
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
buffer = collector.buffer();
buffer.collectCandidate(subSpans[subSpans.length - 1]);
buffer.accept();
int matchSlop = 0;
int lastStart = matchStart;
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
buffer.collectCandidate(prevSpans);
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
@ -160,8 +169,11 @@ public class NearSpansOrdered extends NearSpans {
// prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
buffer.collectCandidate(prevSpans);
}
buffer.accept();
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
@ -190,13 +202,10 @@ public class NearSpansOrdered extends NearSpans {
}
@Override
public Collection<byte[]> getPayload() throws IOException {
return null;
}
@Override
public boolean isPayloadAvailable() {
return false;
public void collect(SpanCollector collector) {
assert collector == this.collector
: "You must collect using the same SpanCollector as was passed to the NearSpans constructor";
buffer.replay();
}
@Override

View File

@ -1,144 +0,0 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
/** A {@link NearSpansOrdered} that allows collecting payloads.
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansPayloadOrdered extends NearSpansOrdered {
private List<byte[]> matchPayload;
private Set<byte[]> possibleMatchPayloads;
public NearSpansPayloadOrdered(SpanNearQuery query, List<Spans> subSpans)
throws IOException {
super(query, subSpans);
this.matchPayload = new LinkedList<>();
this.possibleMatchPayloads = new HashSet<>();
}
/** The subSpans are ordered in the same doc, so there is a possible match.
* Compute the slop while making the match as short as possible by using nextStartPosition
* on all subSpans, except the last one, in reverse order.
* Also collect the payloads.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
matchPayload.clear();
possibleMatchPayloads.clear();
if (lastSubSpans.isPayloadAvailable()) {
possibleMatchPayloads.addAll(lastSubSpans.getPayload());
}
Collection<byte[]> possiblePayload = null;
int matchSlop = 0;
int lastStart = matchStart;
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<>(payload.size());
possiblePayload.addAll(payload);
}
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
oneExhaustedInCurrentDoc = true;
break; // Check remaining subSpans for match.
}
int ppStart = prevSpans.startPosition();
int ppEnd = prevSpans.endPosition();
if (ppEnd > lastStart) { // if overlapping spans
break; // Check remaining subSpans.
}
// prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
if (possiblePayload == null) {
possiblePayload = new ArrayList<>(payload.size());
} else {
possiblePayload.clear();
}
possiblePayload.addAll(payload);
}
}
if (possiblePayload != null) {
possibleMatchPayloads.addAll(possiblePayload);
}
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
}
/* Do not break on (matchSlop > allowedSlop) here to make sure
* that on return the first subSpans has nextStartPosition called.
*/
matchStart = prevStart;
lastStart = prevStart;
}
boolean match = matchSlop <= allowedSlop;
if (match && possibleMatchPayloads.size() > 0) {
matchPayload.addAll(possibleMatchPayloads);
}
return match; // ordered and allowed slop
}
// TODO: Remove warning after API has been finalized
// TODO: Would be nice to be able to lazy load payloads
/** Return payloads when available. */
@Override
public Collection<byte[]> getPayload() throws IOException {
return matchPayload;
}
/** Indicates whether payloads are available */
@Override
public boolean isPayloadAvailable() {
return ! matchPayload.isEmpty();
}
@Override
public String toString() {
return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
}

View File

@ -22,10 +22,7 @@ import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
/**
* Similar to {@link NearSpansOrdered}, but for the unordered case.
@ -118,13 +115,8 @@ public class NearSpansUnordered extends NearSpans {
}
@Override
public Collection<byte[]> getPayload() throws IOException {
return in.getPayload();
}
@Override
public boolean isPayloadAvailable() throws IOException {
return in.isPayloadAvailable();
public void collect(SpanCollector collector) throws IOException {
in.collect(collector);
}
@Override
@ -249,31 +241,11 @@ public class NearSpansUnordered extends NearSpans {
: maxEndPositionCell.endPosition();
}
/**
* WARNING: The List is not necessarily in order of the positions.
* @return Collection of <code>byte[]</code> payloads
* @throws IOException if there is a low-level I/O error
*/
@Override
public Collection<byte[]> getPayload() throws IOException {
Set<byte[]> matchPayload = new HashSet<>();
public void collect(SpanCollector collector) throws IOException {
for (SpansCell cell : subSpanCells) {
if (cell.isPayloadAvailable()) {
matchPayload.addAll(cell.getPayload());
}
cell.collect(collector);
}
return matchPayload;
}
@Override
public boolean isPayloadAvailable() throws IOException {
for (SpansCell cell : subSpanCells) {
if (cell.isPayloadAvailable()) {
return true;
}
}
return false;
}
@Override

View File

@ -0,0 +1,110 @@
package org.apache.lucene.search.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import java.io.IOException;
/**
* An interface defining the collection of postings information from the leaves
* of a {@link org.apache.lucene.search.spans.Spans}
*
* Typical use would be as follows:
* <pre>
* while (spans.nextStartPosition() != NO_MORE_POSITIONS) {
* spanCollector.reset();
* spans.collect(spanCollector);
* doSomethingWith(spanCollector);
* }
* </pre>
*
* @lucene.experimental
*/
public interface SpanCollector {
/**
* Called to indicate that the driving {@link org.apache.lucene.search.spans.Spans} has
* been moved to a new position
*/
public void reset();
/**
* Returns an integer indicating what postings information should be retrieved
*
* See {@link org.apache.lucene.index.TermsEnum#postings(org.apache.lucene.util.Bits, org.apache.lucene.index.PostingsEnum, int)}
*
* @return the postings flag
*/
public int requiredPostings();
/**
* Collect information from postings
* @param postings a {@link PostingsEnum}
* @param term the {@link Term} for this postings list
* @throws IOException on error
*/
public void collectLeaf(PostingsEnum postings, Term term) throws IOException;
/**
* Return a {@link BufferedSpanCollector} for use by eager spans implementations, such
* as {@link NearSpansOrdered}.
*
* @return a BufferedSpanCollector
*/
public BufferedSpanCollector buffer();
/**
* @return the SpanCollector used by the {@link org.apache.lucene.search.spans.BufferedSpanCollector}
* returned from {@link #buffer()}.
*/
public SpanCollector bufferedCollector();
/**
* A default No-op implementation of SpanCollector
*/
public static final SpanCollector NO_OP = new SpanCollector() {
@Override
public void reset() {
}
@Override
public int requiredPostings() {
return PostingsEnum.POSITIONS;
}
@Override
public void collectLeaf(PostingsEnum postings, Term term) {
}
@Override
public BufferedSpanCollector buffer() {
return BufferedSpanCollector.NO_OP;
}
@Override
public SpanCollector bufferedCollector() {
return this;
}
};
}

View File

@ -17,19 +17,19 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
abstract class SpanContainQuery extends SpanQuery implements Cloneable {
SpanQuery big;
SpanQuery little;
@ -55,12 +55,12 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
little.extractTerms(terms);
}
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
Spans bigSpans = big.getSpans(context, acceptDocs, termContexts);
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
Spans bigSpans = big.getSpans(context, acceptDocs, termContexts, collector);
if (bigSpans == null) {
return null;
}
Spans littleSpans = little.getSpans(context, acceptDocs, termContexts);
Spans littleSpans = little.getSpans(context, acceptDocs, termContexts, collector);
if (littleSpans == null) {
return null;
}

View File

@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
/** Keep matches that contain another Spans. */
public class SpanContainingQuery extends SpanContainQuery {
/** Construct a SpanContainingQuery matching spans from <code>big</code>
@ -54,8 +54,8 @@ public class SpanContainingQuery extends SpanContainQuery {
* The payload is from the spans of <code>big</code>.
*/
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
if (containerContained == null) {
return null;
}

View File

@ -17,22 +17,22 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.BooleanClause.Occur; // javadocs only
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* Wraps any {@link MultiTermQuery} as a {@link SpanQuery},
* so it can be nested within other SpanQuery classes.
@ -99,7 +99,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
}
@Override
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
throw new UnsupportedOperationException("Query should have been rewritten");
}

View File

@ -17,15 +17,8 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
@ -33,6 +26,13 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Matches spans which are near one another. One can specify <i>slop</i>, the
* maximum number of intervening unmatched positions, as well as whether
* matches are required to be in-order.
@ -118,11 +118,17 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
Terms terms = context.reader().terms(field);
if (terms == null) {
return null; // field does not exist
}
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector;
for (SpanQuery seq : clauses) {
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts, subSpanCollector);
if (subSpan != null) {
subSpans.add(subSpan);
} else {
@ -130,15 +136,9 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
}
Terms terms = context.reader().terms(field);
if (terms == null) {
return null; // field does not exist
}
// all NearSpans require at least two subSpans
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
: new NearSpansOrdered(this, subSpans);
return (! inOrder) ? new NearSpansUnordered(this, subSpans) : new NearSpansOrdered(this, subSpans, collector);
}
@Override

View File

@ -105,13 +105,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
}
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts, collector);
if (includeSpans == null) {
return null;
}
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts, collector);
if (excludeSpans == null) {
return includeSpans;
}

View File

@ -17,26 +17,24 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.List;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.DisiPriorityQueue;
import org.apache.lucene.search.DisiWrapper;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.DisjunctionDISIApproximation;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Matches the union of its clauses.
@ -147,13 +145,13 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector)
throws IOException {
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
for (SpanQuery sq : clauses) {
Spans spans = sq.getSpans(context, acceptDocs, termContexts);
Spans spans = sq.getSpans(context, acceptDocs, termContexts, collector);
if (spans != null) {
subSpans.add(spans);
}
@ -306,17 +304,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
}
@Override
public Collection<byte[]> getPayload() throws IOException {
return topPositionSpans == null
? null
: topPositionSpans.isPayloadAvailable()
? new ArrayList<>(topPositionSpans.getPayload())
: null;
}
@Override
public boolean isPayloadAvailable() throws IOException {
return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
public void collect(SpanCollector collector) throws IOException {
if (topPositionSpans != null)
topPositionSpans.collect(collector);
}
@Override

View File

@ -77,8 +77,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts, collector);
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
@Override
protected AcceptStatus accept(Spans candidate) throws IOException {

View File

@ -17,10 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@ -29,13 +25,17 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
/** Base class for span-based queries. */
public abstract class SpanQuery extends Query {
/** Expert: Returns the matches for this query in an index.
* Used internally to search for spans.
* This may return null to indicate that the SpanQuery has no results.
*/
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException;
/**
* Extract terms from these spans.
@ -53,7 +53,7 @@ public abstract class SpanQuery extends Query {
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanWeight(this, searcher);
return new SpanWeight(this, searcher, SpanCollector.NO_OP);
}
}

View File

@ -17,13 +17,8 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
@ -32,6 +27,11 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/** Matches spans containing a term.
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
*/
@ -83,7 +83,7 @@ public class SpanTermQuery extends SpanQuery {
}
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
TermContext termContext = termContexts.get(term);
final TermState state;
if (termContext == null) {
@ -115,7 +115,7 @@ public class SpanTermQuery extends SpanQuery {
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS);
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings());
return new TermSpans(postings, term);
}
}

View File

@ -17,12 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -37,6 +31,12 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Expert-only. Public for use by other weight implementations
*/
@ -44,12 +44,14 @@ public class SpanWeight extends Weight {
protected final Similarity similarity;
protected final Map<Term,TermContext> termContexts;
protected final SpanQuery query;
protected final SpanCollector collector;
protected Similarity.SimWeight stats;
public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException {
public SpanWeight(SpanQuery query, IndexSearcher searcher, SpanCollector collector) throws IOException {
super(query);
this.similarity = searcher.getSimilarity();
this.query = query;
this.collector = collector;
termContexts = new HashMap<>();
TreeSet<Term> terms = new TreeSet<>();
@ -97,7 +99,7 @@ public class SpanWeight extends Weight {
if (terms != null && terms.hasPositions() == false) {
throw new IllegalStateException("field \"" + query.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + query + ")");
}
Spans spans = query.getSpans(context, acceptDocs, termContexts);
Spans spans = query.getSpans(context, acceptDocs, termContexts, collector);
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
}

View File

@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
/** Keep matches that are contained within another Spans. */
public class SpanWithinQuery extends SpanContainQuery {
/** Construct a SpanWithinQuery matching spans from <code>little</code>
@ -54,8 +54,8 @@ public class SpanWithinQuery extends SpanContainQuery {
* The payload is from the spans of <code>little</code>.
*/
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
if (containerContained == null) {
return null;
}

View File

@ -17,12 +17,11 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import java.io.IOException;
/** Iterates through combinations of start/end positions per-doc.
* Each start/end position represents a range of term positions within the current document.
* These are enumerated in order, by increasing document number, within that by
@ -51,33 +50,12 @@ public abstract class Spans extends DocIdSetIterator {
public abstract int endPosition();
/**
* Returns the payload data for the current start/end position.
* This is only valid after {@link #nextStartPosition()}
* returned an available start position.
* This method must not be called more than once after each call
* of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
* so if the payload data for the current position is not needed,
* this method may not be called at all for performance reasons.
* <br>
* Note that the return type is a collection, thus the ordering should not be relied upon.
* <br>
* Collect data from the current Spans
* @param collector a SpanCollector
*
* @lucene.experimental
*
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
* @throws IOException if there is a low-level I/O error
*/
public abstract Collection<byte[]> getPayload() throws IOException;
/**
* Checks if a payload can be loaded at the current start/end position.
* <p>
* Payloads can only be loaded once per call to
* {@link #nextStartPosition()}.
*
* @return true if there is a payload available at this start/end position
* that can be loaded
*/
public abstract boolean isPayloadAvailable() throws IOException;
public abstract void collect(SpanCollector collector) throws IOException;
/**
* Optional method: Return a {@link TwoPhaseIterator} view of this

View File

@ -16,14 +16,11 @@ package org.apache.lucene.search.spans;
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collections;
import java.util.Collection;
import java.util.Objects;
/**
@ -109,6 +106,7 @@ public class TermSpans extends Spans {
return postings.cost();
}
/*
@Override
public Collection<byte[]> getPayload() throws IOException {
final BytesRef payload = postings.getPayload();
@ -127,6 +125,12 @@ public class TermSpans extends Spans {
public boolean isPayloadAvailable() throws IOException {
return readPayload == false && postings.getPayload() != null;
}
*/
@Override
public void collect(SpanCollector collector) throws IOException {
collector.collectLeaf(postings, term);
}
@Override
public String toString() {

View File

@ -17,34 +17,37 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.payloads.PayloadSpanCollector;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
/**
* Term position unit test.
@ -53,7 +56,7 @@ import org.apache.lucene.util.BytesRef;
*/
public class TestPositionIncrement extends LuceneTestCase {
final static boolean VERBOSE = false;
final static boolean VERBOSE = true;
public void testSetPosition() throws Exception {
Analyzer analyzer = new Analyzer() {
@ -238,14 +241,17 @@ public class TestPositionIncrement extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\ngetPayloadSpans test");
}
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (VERBOSE) {
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
+ " to " + pspans.endPosition());
}
Collection<byte[]> payloads = pspans.getPayload();
collector.reset();
pspans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
sawZero |= pspans.startPosition() == 0;
for (byte[] bytes : payloads) {
count++;
@ -256,7 +262,7 @@ public class TestPositionIncrement extends LuceneTestCase {
}
}
assertTrue(sawZero);
assertEquals(5, count);
assertEquals(8, count);
// System.out.println("\ngetSpans test");
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
@ -282,7 +288,7 @@ public class TestPositionIncrement extends LuceneTestCase {
//System.out.println(s);
sawZero |= s.equals("pos: 0");
}
assertEquals(5, count);
assertEquals(8, count);
assertTrue(sawZero);
writer.close();
is.getIndexReader().close();

View File

@ -17,13 +17,6 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimplePayloadFilter;
@ -36,9 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanPositionRangeQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -50,6 +41,13 @@ import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/** basic test of payload-spans */
public class TestPayloadBasics extends LuceneTestCase {
private static IndexSearcher searcher;

View File

@ -16,32 +16,23 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.payloads.PayloadHelper;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
@ -55,6 +46,13 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
public class TestPayloadSpans extends LuceneTestCase {
private IndexSearcher searcher;
private Similarity similarity = new DefaultSimilarity();
@ -74,14 +72,15 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq;
Spans spans;
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader, stq);
PayloadSpanCollector collector = new PayloadSpanCollector();
spans = MultiSpansWrapper.wrap(indexReader, stq, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 100, 1, 1, 1);
checkSpans(spans, collector, 100, 1, 1, 1);
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader, stq);
spans = MultiSpansWrapper.wrap(indexReader, stq, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 100, 0, 0, 0);
checkSpans(spans, collector, 100, 0, 0, 0);
}
public void testSpanFirst() throws IOException {
@ -90,19 +89,20 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanFirstQuery sfq;
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
sfq = new SpanFirstQuery(match, 2);
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq);
checkSpans(spans, 109, 1, 1, 1);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, collector);
checkSpans(spans, collector, 109, 1, 1, 1);
//Test more complicated subclause
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
match = new SpanNearQuery(clauses, 0, true);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1);
match = new SpanNearQuery(clauses, 0, false);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1);
}
@ -124,9 +124,9 @@ public class TestPayloadSpans extends LuceneTestCase {
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2});
PayloadSpanCollector collector = new PayloadSpanCollector();
checkSpans(MultiSpansWrapper.wrap(reader, snq, collector), collector, 1, new int[]{2});
reader.close();
directory.close();
}
@ -135,8 +135,10 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq;
Spans spans;
IndexSearcher searcher = getSearcher();
PayloadSpanCollector collector = new PayloadSpanCollector();
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, collector);
assertNull(spans);
SpanQuery[] clauses = new SpanQuery[3];
@ -145,9 +147,9 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{3,3});
checkSpans(spans, collector, 2, new int[]{3,3});
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
@ -156,10 +158,10 @@ public class TestPayloadSpans extends LuceneTestCase {
spanNearQuery = new SpanNearQuery(clauses, 6, true);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[]{3});
checkSpans(spans, collector, 1, new int[]{3});
clauses = new SpanQuery[2];
@ -178,10 +180,9 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
// yy within 6 of xx within 6 of rr
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{3,3});
checkSpans(spans, collector, 2, new int[]{3,3});
closeIndexReader.close();
directory.close();
}
@ -208,12 +209,13 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
clauses3[1] = snq;
PayloadSpanCollector collector = new PayloadSpanCollector();
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[]{3});
checkSpans(spans, collector, 1, new int[]{3});
closeIndexReader.close();
directory.close();
}
@ -248,9 +250,10 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
PayloadSpanCollector collector = new PayloadSpanCollector();
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{8, 8});
checkSpans(spans, collector, 2, new int[]{8, 8});
closeIndexReader.close();
directory.close();
}
@ -272,15 +275,17 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
}
@ -310,14 +315,17 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
@ -348,14 +356,17 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
Collection<byte[]> payloads = spans.getPayload();
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
@ -401,7 +412,7 @@ public class TestPayloadSpans extends LuceneTestCase {
directory.close();
}
private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads,
private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads,
int expectedPayloadLength, int expectedFirstByte) throws IOException {
assertTrue("spans is null and it shouldn't be", spans != null);
//each position match should have a span associated with it, since there is just one underlying term query, there should
@ -409,16 +420,16 @@ public class TestPayloadSpans extends LuceneTestCase {
int seen = 0;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable());
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
assertEquals("payload size", expectedNumPayloads, payload.size());
for (final byte [] thePayload : payload) {
assertEquals("payload length", expectedPayloadLength, thePayload.length);
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
}
collector.reset();
spans.collect(collector);
Collection<byte[]> payload = collector.getPayloads();
assertEquals("payload size", expectedNumPayloads, payload.size());
for (final byte [] thePayload : payload) {
assertEquals("payload length", expectedPayloadLength, thePayload.length);
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
}
seen++;
}
}
@ -446,26 +457,26 @@ public class TestPayloadSpans extends LuceneTestCase {
return searcher;
}
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException {
int cnt = 0;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if(VERBOSE)
System.out.println("\nSpans Dump --");
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
collector.reset();
spans.collect(collector);
Collection<byte[]> payload = collector.getPayloads();
if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
+ new String(bytes, StandardCharsets.UTF_8));
}
}
assertEquals("payload size", numPayloads[cnt], payload.size());
} else { // no payload available
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 );
}
assertEquals("payload size", numPayloads[cnt], payload.size());
cnt++;
}
}

View File

@ -17,17 +17,16 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
/**
* Holds all implementations of classes in the o.a.l.s.spans package as a
* back-compatibility test. It does not run any tests per-se, however if
@ -65,22 +64,17 @@ final class JustCompileSearchSpans {
public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void collect(SpanCollector collector) throws IOException {
}
@Override
public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public Collection<byte[]> getPayload() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean isPayloadAvailable() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -100,7 +94,7 @@ final class JustCompileSearchSpans {
}
@Override
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) {
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@ -137,22 +131,17 @@ final class JustCompileSearchSpans {
public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void collect(SpanCollector collector) throws IOException {
}
@Override
public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public Collection<byte[]> getPayload() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean isPayloadAvailable() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);

View File

@ -17,11 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -30,6 +25,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
/**
*
* A wrapper to perform span operations on a non-leaf reader context
@ -40,6 +40,10 @@ import org.apache.lucene.util.Bits;
public class MultiSpansWrapper {
public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
return wrap(reader, spanQuery, SpanCollector.NO_OP);
}
public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanCollector collector) throws IOException {
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
LeafReaderContext lrContext = lr.getContext();
SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
@ -50,7 +54,7 @@ public class MultiSpansWrapper {
TermContext termContext = TermContext.build(lrContext, term);
termContexts.put(term, termContext);
}
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts, collector);
return actSpans;
}
}

View File

@ -17,17 +17,6 @@ package org.apache.lucene.search.highlight;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.BinaryDocValues;
@ -59,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@ -69,6 +59,17 @@ import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@ -307,7 +308,7 @@ public class WeightedSpanTermExtractor {
termContexts.put(term, TermContext.build(context, term));
}
Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
final Spans spans = q.getSpans(context, acceptDocs, termContexts, SpanCollector.NO_OP);
if (spans == null) {
return;
}

View File

@ -85,7 +85,7 @@ import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;

View File

@ -17,10 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -29,6 +25,10 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
/** Wraps a span query with asserts */
public class AssertingSpanQuery extends SpanQuery {
private final SpanQuery in;
@ -43,8 +43,8 @@ public class AssertingSpanQuery extends SpanQuery {
}
@Override
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
Spans spans = in.getSpans(context, acceptDocs, termContexts);
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
Spans spans = in.getSpans(context, acceptDocs, termContexts, collector);
if (spans == null) {
return null;
} else {

View File

@ -17,12 +17,10 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.Spans;
import java.io.IOException;
/**
* Wraps a Spans with additional asserts
@ -125,19 +123,13 @@ class AssertingSpans extends Spans {
checkCurrentPositions();
return in.endPosition();
}
@Override
public Collection<byte[]> getPayload() throws IOException {
assert state == State.ITERATING : "getPayload() called in illegal state: " + state + ": " + in;
return in.getPayload();
public void collect(SpanCollector collector) throws IOException {
assert state == State.ITERATING : "collect() called in illegal state: " + state + ": " + in;
in.collect(collector);
}
@Override
public boolean isPayloadAvailable() throws IOException {
assert state == State.ITERATING : "isPayloadAvailable() called in illegal state: " + state + ": " + in;
return in.isPayloadAvailable();
}
@Override
public int docID() {
int doc = in.docID();

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.HighlightParams;