mirror of https://github.com/apache/lucene.git
LUCENE-6371: Add collection API to Spans, remove payload methods
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88941936ca
commit
a0561676a0
|
@ -189,6 +189,12 @@ API Changes
|
|||
* LUCENE-6445: Two new methods in Highlighter's TokenSources; the existing
|
||||
methods are now marked deprecated. (David Smiley)
|
||||
|
||||
* LUCENE-6371: Payload collection from Spans is moved to a more generic
|
||||
SpanCollector framework. Spans no longer implements .hasPayload() and
|
||||
.getPayload() methods, and instead exposes a collect() method that allows
|
||||
the collection of arbitrary postings information. (Alan Woodward, David
|
||||
Smiley, Paul Elschot)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6413: Test runner should report the number of suites completed/
|
||||
|
|
|
@ -17,11 +17,6 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -29,8 +24,6 @@ import org.apache.lucene.search.Scorer;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.NearSpansOrdered;
|
||||
import org.apache.lucene.search.spans.NearSpansUnordered;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
|
@ -40,6 +33,11 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* This class is very similar to
|
||||
* {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors
|
||||
|
@ -55,8 +53,10 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||
*/
|
||||
public class PayloadNearQuery extends SpanNearQuery {
|
||||
|
||||
protected String fieldName;
|
||||
protected PayloadFunction function;
|
||||
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
|
||||
|
||||
public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
|
||||
this(clauses, slop, inOrder, new AveragePayloadFunction());
|
||||
|
@ -129,17 +129,18 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
}
|
||||
|
||||
public class PayloadNearSpanWeight extends SpanWeight {
|
||||
|
||||
public PayloadNearSpanWeight(SpanQuery query, IndexSearcher searcher)
|
||||
throws IOException {
|
||||
super(query, searcher);
|
||||
super(query, searcher, payloadCollector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts, payloadCollector);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
|
||||
: new PayloadNearSpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -176,31 +177,11 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
protected float payloadScore;
|
||||
private int payloadsSeen;
|
||||
|
||||
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
|
||||
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
|
||||
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
this.spans = spans;
|
||||
}
|
||||
|
||||
// Get the payloads associated with all underlying subspans
|
||||
public void getPayloads(Spans[] subSpans) throws IOException {
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
if (subSpans[i] instanceof NearSpansOrdered) {
|
||||
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
|
||||
} else if (subSpans[i] instanceof NearSpansUnordered) {
|
||||
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO change the whole spans api to use bytesRef, or nuke spans
|
||||
BytesRef scratch = new BytesRef();
|
||||
|
||||
|
@ -237,9 +218,9 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
do {
|
||||
int matchLength = spans.endPosition() - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
payloadCollector.reset();
|
||||
spans.collect(payloadCollector);
|
||||
processPayloads(payloadCollector.getPayloads(), startPos, spans.endPosition());
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.lucene.search.payloads;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spans.BufferedSpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* SpanCollector implementation that collects payloads from a {@link Spans}
|
||||
*/
|
||||
public class PayloadSpanCollector implements SpanCollector {
|
||||
|
||||
private final Collection<byte[]> payloads = new ArrayList<>();
|
||||
BufferedPayloadCollector bufferedCollector;
|
||||
|
||||
public Collection<byte[]> getPayloads() {
|
||||
return payloads;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
payloads.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int requiredPostings() {
|
||||
return PostingsEnum.PAYLOADS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectLeaf(PostingsEnum postings, Term term) throws IOException {
|
||||
BytesRef payload = postings.getPayload();
|
||||
if (payload == null)
|
||||
return;
|
||||
final byte[] bytes = new byte[payload.length];
|
||||
System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
|
||||
payloads.add(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedSpanCollector buffer() {
|
||||
if (bufferedCollector == null)
|
||||
bufferedCollector = new BufferedPayloadCollector();
|
||||
bufferedCollector.reset();
|
||||
return bufferedCollector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanCollector bufferedCollector() {
|
||||
if (bufferedCollector == null)
|
||||
bufferedCollector = new BufferedPayloadCollector();
|
||||
return bufferedCollector.candidateCollector;
|
||||
}
|
||||
|
||||
class BufferedPayloadCollector implements BufferedSpanCollector {
|
||||
|
||||
final Collection<byte[]> buffer = new ArrayList<>();
|
||||
PayloadSpanCollector candidateCollector = new PayloadSpanCollector();
|
||||
|
||||
void reset() {
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectCandidate(Spans spans) throws IOException {
|
||||
candidateCollector.reset();
|
||||
spans.collect(candidateCollector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept() {
|
||||
buffer.addAll(candidateCollector.payloads);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replay() {
|
||||
payloads.addAll(buffer);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,15 +17,6 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -46,6 +37,15 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Experimental class to get set of payloads for most standard Lucene queries.
|
||||
* Operates like Highlighter - IndexReader should only contain doc of interest,
|
||||
|
@ -187,17 +187,16 @@ public class PayloadSpanUtil {
|
|||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(context, term));
|
||||
}
|
||||
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
for (LeafReaderContext leafReaderContext : context.leaves()) {
|
||||
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
|
||||
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts, collector);
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
for (byte [] bytes : payload) {
|
||||
payloads.add(bytes);
|
||||
}
|
||||
}
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
payloads.addAll(collector.getPayloads());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,6 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -28,6 +25,8 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.BufferedSpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -37,6 +36,9 @@ import org.apache.lucene.search.spans.TermSpans;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* This class is very similar to
|
||||
* {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors
|
||||
|
@ -67,19 +69,52 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadTermWeight(this, searcher);
|
||||
return new PayloadTermWeight(this, searcher, new PayloadTermCollector());
|
||||
}
|
||||
|
||||
protected class PayloadTermCollector implements SpanCollector {
|
||||
|
||||
BytesRef payload;
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
payload = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int requiredPostings() {
|
||||
return PostingsEnum.PAYLOADS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectLeaf(PostingsEnum postings, Term term) throws IOException {
|
||||
payload = postings.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedSpanCollector buffer() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanCollector bufferedCollector() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
protected class PayloadTermWeight extends SpanWeight {
|
||||
|
||||
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher)
|
||||
final PayloadTermCollector payloadCollector;
|
||||
|
||||
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher, PayloadTermCollector collector)
|
||||
throws IOException {
|
||||
super(query, searcher);
|
||||
super(query, searcher, collector);
|
||||
this.payloadCollector = collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
|
||||
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts, payloadCollector);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
|
@ -109,29 +144,22 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
numMatches++;
|
||||
processPayload(similarity);
|
||||
payloadCollector.reset();
|
||||
spans.collect(payloadCollector);
|
||||
processPayload();
|
||||
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
}
|
||||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
final PostingsEnum postings = termSpans.getPostings();
|
||||
payload = postings.getPayload();
|
||||
if (payload != null) {
|
||||
payloadScore = function.currentScore(docID(), term.field(),
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
|
||||
} else {
|
||||
payloadScore = function.currentScore(docID(), term.field(),
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
||||
}
|
||||
payloadsSeen++;
|
||||
protected void processPayload() throws IOException {
|
||||
|
||||
float payloadFactor = payloadCollector.payload == null ? 1F :
|
||||
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payloadCollector.payload);
|
||||
payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(),
|
||||
payloadsSeen, payloadScore, payloadFactor);
|
||||
payloadsSeen++;
|
||||
|
||||
} else {
|
||||
// zero out the payload?
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
package org.apache.lucene.search.payloads;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -16,7 +16,12 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -30,10 +35,12 @@ import java.util.Objects;
|
|||
* the given position.
|
||||
*/
|
||||
public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
|
||||
|
||||
protected final Collection<byte[]> payloadToMatch;
|
||||
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
|
||||
|
||||
/**
|
||||
* @param match The underlying {@link SpanQuery} to check
|
||||
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
|
||||
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
|
||||
*/
|
||||
public SpanNearPayloadCheckQuery(SpanNearQuery match, Collection<byte[]> payloadToMatch) {
|
||||
|
@ -41,35 +48,41 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
this.payloadToMatch = Objects.requireNonNull(payloadToMatch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher, payloadCollector);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
boolean result = spans.isPayloadAvailable();
|
||||
if (result == true) {
|
||||
Collection<byte[]> candidate = spans.getPayload();
|
||||
if (candidate.size() == payloadToMatch.size()) {
|
||||
//TODO: check the byte arrays are the same
|
||||
//hmm, can't rely on order here
|
||||
int matches = 0;
|
||||
for (byte[] candBytes : candidate) {
|
||||
//Unfortunately, we can't rely on order, so we need to compare all
|
||||
for (byte[] payBytes : payloadToMatch) {
|
||||
if (Arrays.equals(candBytes, payBytes) == true) {
|
||||
matches++;
|
||||
break;
|
||||
}
|
||||
|
||||
payloadCollector.reset();
|
||||
spans.collect(payloadCollector);
|
||||
|
||||
Collection<byte[]> candidate = payloadCollector.getPayloads();
|
||||
if (candidate.size() == payloadToMatch.size()) {
|
||||
//TODO: check the byte arrays are the same
|
||||
//hmm, can't rely on order here
|
||||
int matches = 0;
|
||||
for (byte[] candBytes : candidate) {
|
||||
//Unfortunately, we can't rely on order, so we need to compare all
|
||||
for (byte[] payBytes : payloadToMatch) {
|
||||
if (Arrays.equals(candBytes, payBytes) == true) {
|
||||
matches++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matches == payloadToMatch.size()){
|
||||
//we've verified all the bytes
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
||||
if (matches == payloadToMatch.size()){
|
||||
//we've verified all the bytes
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
return AcceptStatus.NO;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
package org.apache.lucene.search.payloads;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -16,7 +16,14 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.payloads.PayloadSpanCollector;
|
||||
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -30,11 +37,13 @@ import java.util.Iterator;
|
|||
* the given position.
|
||||
* <p>
|
||||
* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
* Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* Instead, use {@link org.apache.lucene.search.payloads.SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
*/
|
||||
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
|
||||
|
||||
protected final Collection<byte[]> payloadToMatch;
|
||||
protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector();
|
||||
|
||||
/**
|
||||
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
|
||||
|
@ -48,29 +57,35 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
this.payloadToMatch = payloadToMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher, payloadCollector);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
boolean result = spans.isPayloadAvailable();
|
||||
if (result == true){
|
||||
Collection<byte[]> candidate = spans.getPayload();
|
||||
if (candidate.size() == payloadToMatch.size()){
|
||||
//TODO: check the byte arrays are the same
|
||||
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
|
||||
//check each of the byte arrays, in order
|
||||
//hmm, can't rely on order here
|
||||
for (byte[] candBytes : candidate) {
|
||||
//if one is a mismatch, then return false
|
||||
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
|
||||
payloadCollector.reset();
|
||||
spans.collect(payloadCollector);
|
||||
|
||||
Collection<byte[]> candidate = payloadCollector.getPayloads();
|
||||
if (candidate.size() == payloadToMatch.size()){
|
||||
//TODO: check the byte arrays are the same
|
||||
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
|
||||
//check each of the byte arrays, in order
|
||||
//hmm, can't rely on order here
|
||||
for (byte[] candBytes : candidate) {
|
||||
//if one is a mismatch, then return false
|
||||
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
//we've verified all the bytes
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
//we've verified all the bytes
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
return AcceptStatus.YES;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Defines span collection for eager Span implementations, such as
|
||||
* {@link org.apache.lucene.search.spans.NearSpansOrdered}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface BufferedSpanCollector {
|
||||
|
||||
/**
|
||||
* Collect information from a possible candidate
|
||||
* @param spans the candidate Spans
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public void collectCandidate(Spans spans) throws IOException;
|
||||
|
||||
/**
|
||||
* Confirm that the last candidate Spans has been accepted by the parent algorithm
|
||||
*/
|
||||
public void accept();
|
||||
|
||||
/**
|
||||
* Replay buffered information back to the parent SpanCollector
|
||||
*/
|
||||
public void replay();
|
||||
|
||||
/**
|
||||
* A default No-op BufferedSpanCollector
|
||||
*/
|
||||
public static final BufferedSpanCollector NO_OP = new BufferedSpanCollector() {
|
||||
@Override
|
||||
public void collectCandidate(Spans spans) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replay() {
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
abstract class ContainSpans extends ConjunctionSpans {
|
||||
|
@ -49,12 +48,8 @@ abstract class ContainSpans extends ConjunctionSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return sourceSpans.isPayloadAvailable();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
sourceSpans.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return sourceSpans.getPayload();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,21 +17,20 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* <p>Wrapper to allow {@link SpanQuery} objects participate in composite
|
||||
* single-field SpanQueries by 'lying' about their search field. That is,
|
||||
|
@ -97,8 +96,8 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
|||
// ...this is done to be more consistent with things like SpanFirstQuery
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
return maskedQuery.getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
return maskedQuery.getSpans(context, acceptDocs, termContexts, collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,12 +17,11 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A {@link Spans} implementation wrapping another spans instance,
|
||||
* allowing to filter spans matches easily by implementing {@link #accept}
|
||||
|
@ -110,17 +109,12 @@ public abstract class FilterSpans extends Spans {
|
|||
return atFirstInCurrentDoc ? -1
|
||||
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Collection<byte[]> getPayload() throws IOException {
|
||||
return in.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean isPayloadAvailable() throws IOException {
|
||||
return in.isPayloadAvailable();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
in.collect(collector);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public final long cost() {
|
||||
return in.cost();
|
||||
|
|
|
@ -19,12 +19,9 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
|
||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||
* where the subspans do not overlap and have a maximum slop between them,
|
||||
* and that does not need to collect payloads.
|
||||
* To also collect payloads, see {@link NearSpansPayloadOrdered}.
|
||||
* where the subspans do not overlap and have a maximum slop between them.
|
||||
* <p>
|
||||
* The formed spans only contains minimum slop matches.<br>
|
||||
* The matching slop is computed from the distance(s) between
|
||||
|
@ -41,6 +38,9 @@ import java.util.Collection;
|
|||
* <pre>t1 t2 .. t3 </pre>
|
||||
* <pre> t1 .. t2 t3</pre>
|
||||
*
|
||||
* Because the algorithm used to minimize the size of a match consumes
|
||||
* child Spans eagerly, this uses a BufferedSpanCollector to collect
|
||||
* information from subspans.
|
||||
*
|
||||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
|
@ -51,9 +51,13 @@ public class NearSpansOrdered extends NearSpans {
|
|||
protected int matchStart = -1;
|
||||
protected int matchEnd = -1;
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
|
||||
protected final SpanCollector collector;
|
||||
protected BufferedSpanCollector buffer;
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans, SpanCollector collector) throws IOException {
|
||||
super(query, subSpans);
|
||||
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
|
||||
this.collector = collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -140,10 +144,15 @@ public class NearSpansOrdered extends NearSpans {
|
|||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
buffer = collector.buffer();
|
||||
buffer.collectCandidate(subSpans[subSpans.length - 1]);
|
||||
buffer.accept();
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
buffer.collectCandidate(prevSpans);
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
|
@ -160,8 +169,11 @@ public class NearSpansOrdered extends NearSpans {
|
|||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
buffer.collectCandidate(prevSpans);
|
||||
}
|
||||
|
||||
buffer.accept();
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||
matchSlop += (matchStart - prevEnd);
|
||||
|
@ -190,13 +202,10 @@ public class NearSpansOrdered extends NearSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return false;
|
||||
public void collect(SpanCollector collector) {
|
||||
assert collector == this.collector
|
||||
: "You must collect using the same SpanCollector as was passed to the NearSpans constructor";
|
||||
buffer.replay();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,144 +0,0 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/** A {@link NearSpansOrdered} that allows collecting payloads.
|
||||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||
|
||||
private List<byte[]> matchPayload;
|
||||
private Set<byte[]> possibleMatchPayloads;
|
||||
|
||||
public NearSpansPayloadOrdered(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
super(query, subSpans);
|
||||
this.matchPayload = new LinkedList<>();
|
||||
this.possibleMatchPayloads = new HashSet<>();
|
||||
}
|
||||
|
||||
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||
* Compute the slop while making the match as short as possible by using nextStartPosition
|
||||
* on all subSpans, except the last one, in reverse order.
|
||||
* Also collect the payloads.
|
||||
*/
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans[subSpans.length - 1];
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
matchPayload.clear();
|
||||
possibleMatchPayloads.clear();
|
||||
|
||||
if (lastSubSpans.isPayloadAvailable()) {
|
||||
possibleMatchPayloads.addAll(lastSubSpans.getPayload());
|
||||
}
|
||||
|
||||
Collection<byte[]> possiblePayload = null;
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
|
||||
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
break; // Check remaining subSpans for match.
|
||||
}
|
||||
int ppStart = prevSpans.startPosition();
|
||||
int ppEnd = prevSpans.endPosition();
|
||||
if (ppEnd > lastStart) { // if overlapping spans
|
||||
break; // Check remaining subSpans.
|
||||
}
|
||||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
if (possiblePayload == null) {
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
} else {
|
||||
possiblePayload.clear();
|
||||
}
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
}
|
||||
|
||||
if (possiblePayload != null) {
|
||||
possibleMatchPayloads.addAll(possiblePayload);
|
||||
}
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||
matchSlop += (matchStart - prevEnd);
|
||||
}
|
||||
|
||||
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||
* that on return the first subSpans has nextStartPosition called.
|
||||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
}
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
||||
if (match && possibleMatchPayloads.size() > 0) {
|
||||
matchPayload.addAll(possibleMatchPayloads);
|
||||
}
|
||||
|
||||
return match; // ordered and allowed slop
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
// TODO: Would be nice to be able to lazy load payloads
|
||||
/** Return payloads when available. */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
/** Indicates whether payloads are available */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return ! matchPayload.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
}
|
||||
|
|
@ -22,10 +22,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* Similar to {@link NearSpansOrdered}, but for the unordered case.
|
||||
|
@ -118,13 +115,8 @@ public class NearSpansUnordered extends NearSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return in.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return in.isPayloadAvailable();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
in.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -249,31 +241,11 @@ public class NearSpansUnordered extends NearSpans {
|
|||
: maxEndPositionCell.endPosition();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* WARNING: The List is not necessarily in order of the positions.
|
||||
* @return Collection of <code>byte[]</code> payloads
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
Set<byte[]> matchPayload = new HashSet<>();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
matchPayload.addAll(cell.getPayload());
|
||||
}
|
||||
cell.collect(collector);
|
||||
}
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* An interface defining the collection of postings information from the leaves
|
||||
* of a {@link org.apache.lucene.search.spans.Spans}
|
||||
*
|
||||
* Typical use would be as follows:
|
||||
* <pre>
|
||||
* while (spans.nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
* spanCollector.reset();
|
||||
* spans.collect(spanCollector);
|
||||
* doSomethingWith(spanCollector);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface SpanCollector {
|
||||
|
||||
/**
|
||||
* Called to indicate that the driving {@link org.apache.lucene.search.spans.Spans} has
|
||||
* been moved to a new position
|
||||
*/
|
||||
public void reset();
|
||||
|
||||
/**
|
||||
* Returns an integer indicating what postings information should be retrieved
|
||||
*
|
||||
* See {@link org.apache.lucene.index.TermsEnum#postings(org.apache.lucene.util.Bits, org.apache.lucene.index.PostingsEnum, int)}
|
||||
*
|
||||
* @return the postings flag
|
||||
*/
|
||||
public int requiredPostings();
|
||||
|
||||
/**
|
||||
* Collect information from postings
|
||||
* @param postings a {@link PostingsEnum}
|
||||
* @param term the {@link Term} for this postings list
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public void collectLeaf(PostingsEnum postings, Term term) throws IOException;
|
||||
|
||||
/**
|
||||
* Return a {@link BufferedSpanCollector} for use by eager spans implementations, such
|
||||
* as {@link NearSpansOrdered}.
|
||||
*
|
||||
* @return a BufferedSpanCollector
|
||||
*/
|
||||
public BufferedSpanCollector buffer();
|
||||
|
||||
/**
|
||||
* @return the SpanCollector used by the {@link org.apache.lucene.search.spans.BufferedSpanCollector}
|
||||
* returned from {@link #buffer()}.
|
||||
*/
|
||||
public SpanCollector bufferedCollector();
|
||||
|
||||
/**
|
||||
* A default No-op implementation of SpanCollector
|
||||
*/
|
||||
public static final SpanCollector NO_OP = new SpanCollector() {
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int requiredPostings() {
|
||||
return PostingsEnum.POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectLeaf(PostingsEnum postings, Term term) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedSpanCollector buffer() {
|
||||
return BufferedSpanCollector.NO_OP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanCollector bufferedCollector() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -17,19 +17,19 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
abstract class SpanContainQuery extends SpanQuery implements Cloneable {
|
||||
SpanQuery big;
|
||||
SpanQuery little;
|
||||
|
@ -55,12 +55,12 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
|
|||
little.extractTerms(terms);
|
||||
}
|
||||
|
||||
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans bigSpans = big.getSpans(context, acceptDocs, termContexts);
|
||||
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans bigSpans = big.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (bigSpans == null) {
|
||||
return null;
|
||||
}
|
||||
Spans littleSpans = little.getSpans(context, acceptDocs, termContexts);
|
||||
Spans littleSpans = little.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (littleSpans == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that contain another Spans. */
|
||||
public class SpanContainingQuery extends SpanContainQuery {
|
||||
/** Construct a SpanContainingQuery matching spans from <code>big</code>
|
||||
|
@ -54,8 +54,8 @@ public class SpanContainingQuery extends SpanContainQuery {
|
|||
* The payload is from the spans of <code>big</code>.
|
||||
*/
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -17,22 +17,22 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TopTermsRewrite;
|
||||
import org.apache.lucene.search.ScoringRewrite;
|
||||
import org.apache.lucene.search.BooleanClause.Occur; // javadocs only
|
||||
import org.apache.lucene.search.TopTermsRewrite;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Wraps any {@link MultiTermQuery} as a {@link SpanQuery},
|
||||
* so it can be nested within other SpanQuery classes.
|
||||
|
@ -99,7 +99,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
throw new UnsupportedOperationException("Query should have been rewritten");
|
||||
}
|
||||
|
||||
|
|
|
@ -17,15 +17,8 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -33,6 +26,13 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||
* maximum number of intervening unmatched positions, as well as whether
|
||||
* matches are required to be in-order.
|
||||
|
@ -118,11 +118,17 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null; // field does not exist
|
||||
}
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector;
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts, subSpanCollector);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
} else {
|
||||
|
@ -130,15 +136,9 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null; // field does not exist
|
||||
}
|
||||
|
||||
// all NearSpans require at least two subSpans
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
||||
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
|
||||
: new NearSpansOrdered(this, subSpans);
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans) : new NearSpansOrdered(this, subSpans, collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -105,13 +105,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (includeSpans == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (excludeSpans == null) {
|
||||
return includeSpans;
|
||||
}
|
||||
|
|
|
@ -17,26 +17,24 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.DisiPriorityQueue;
|
||||
import org.apache.lucene.search.DisiWrapper;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.DisjunctionDISIApproximation;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/** Matches the union of its clauses.
|
||||
|
@ -147,13 +145,13 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector)
|
||||
throws IOException {
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
for (SpanQuery sq : clauses) {
|
||||
Spans spans = sq.getSpans(context, acceptDocs, termContexts);
|
||||
Spans spans = sq.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (spans != null) {
|
||||
subSpans.add(spans);
|
||||
}
|
||||
|
@ -306,17 +304,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return topPositionSpans == null
|
||||
? null
|
||||
: topPositionSpans.isPayloadAvailable()
|
||||
? new ArrayList<>(topPositionSpans.getPayload())
|
||||
: null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
if (topPositionSpans != null)
|
||||
topPositionSpans.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -77,8 +77,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts, collector);
|
||||
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
|
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
|
@ -29,13 +25,17 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
/** Expert: Returns the matches for this query in an index.
|
||||
* Used internally to search for spans.
|
||||
* This may return null to indicate that the SpanQuery has no results.
|
||||
*/
|
||||
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
|
||||
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException;
|
||||
|
||||
/**
|
||||
* Extract terms from these spans.
|
||||
|
@ -53,7 +53,7 @@ public abstract class SpanQuery extends Query {
|
|||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher);
|
||||
return new SpanWeight(this, searcher, SpanCollector.NO_OP);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,13 +17,8 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -32,6 +27,11 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/** Matches spans containing a term.
|
||||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
|
@ -83,7 +83,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
TermContext termContext = termContexts.get(term);
|
||||
final TermState state;
|
||||
if (termContext == null) {
|
||||
|
@ -115,7 +115,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS);
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings());
|
||||
return new TermSpans(postings, term);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -37,6 +31,12 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Expert-only. Public for use by other weight implementations
|
||||
*/
|
||||
|
@ -44,12 +44,14 @@ public class SpanWeight extends Weight {
|
|||
protected final Similarity similarity;
|
||||
protected final Map<Term,TermContext> termContexts;
|
||||
protected final SpanQuery query;
|
||||
protected final SpanCollector collector;
|
||||
protected Similarity.SimWeight stats;
|
||||
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException {
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher, SpanCollector collector) throws IOException {
|
||||
super(query);
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.query = query;
|
||||
this.collector = collector;
|
||||
|
||||
termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
|
@ -97,7 +99,7 @@ public class SpanWeight extends Weight {
|
|||
if (terms != null && terms.hasPositions() == false) {
|
||||
throw new IllegalStateException("field \"" + query.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + query + ")");
|
||||
}
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts, collector);
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
|
|
|
@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/** Keep matches that are contained within another Spans. */
|
||||
public class SpanWithinQuery extends SpanContainQuery {
|
||||
/** Construct a SpanWithinQuery matching spans from <code>little</code>
|
||||
|
@ -54,8 +54,8 @@ public class SpanWithinQuery extends SpanContainQuery {
|
|||
* The payload is from the spans of <code>little</code>.
|
||||
*/
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts, collector);
|
||||
if (containerContained == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -17,12 +17,11 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Iterates through combinations of start/end positions per-doc.
|
||||
* Each start/end position represents a range of term positions within the current document.
|
||||
* These are enumerated in order, by increasing document number, within that by
|
||||
|
@ -51,33 +50,12 @@ public abstract class Spans extends DocIdSetIterator {
|
|||
public abstract int endPosition();
|
||||
|
||||
/**
|
||||
* Returns the payload data for the current start/end position.
|
||||
* This is only valid after {@link #nextStartPosition()}
|
||||
* returned an available start position.
|
||||
* This method must not be called more than once after each call
|
||||
* of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
|
||||
* so if the payload data for the current position is not needed,
|
||||
* this method may not be called at all for performance reasons.
|
||||
* <br>
|
||||
* Note that the return type is a collection, thus the ordering should not be relied upon.
|
||||
* <br>
|
||||
* Collect data from the current Spans
|
||||
* @param collector a SpanCollector
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
public abstract Collection<byte[]> getPayload() throws IOException;
|
||||
|
||||
/**
|
||||
* Checks if a payload can be loaded at the current start/end position.
|
||||
* <p>
|
||||
* Payloads can only be loaded once per call to
|
||||
* {@link #nextStartPosition()}.
|
||||
*
|
||||
* @return true if there is a payload available at this start/end position
|
||||
* that can be loaded
|
||||
*/
|
||||
public abstract boolean isPayloadAvailable() throws IOException;
|
||||
public abstract void collect(SpanCollector collector) throws IOException;
|
||||
|
||||
/**
|
||||
* Optional method: Return a {@link TwoPhaseIterator} view of this
|
||||
|
|
|
@ -16,14 +16,11 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -109,6 +106,7 @@ public class TermSpans extends Spans {
|
|||
return postings.cost();
|
||||
}
|
||||
|
||||
/*
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
final BytesRef payload = postings.getPayload();
|
||||
|
@ -127,6 +125,12 @@ public class TermSpans extends Spans {
|
|||
public boolean isPayloadAvailable() throws IOException {
|
||||
return readPayload == false && postings.getPayload() != null;
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
collector.collectLeaf(postings, term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -17,34 +17,37 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.search.payloads.PayloadSpanCollector;
|
||||
import org.apache.lucene.search.payloads.PayloadSpanUtil;
|
||||
import org.apache.lucene.search.spans.MultiSpansWrapper;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Term position unit test.
|
||||
|
@ -53,7 +56,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class TestPositionIncrement extends LuceneTestCase {
|
||||
|
||||
final static boolean VERBOSE = false;
|
||||
final static boolean VERBOSE = true;
|
||||
|
||||
public void testSetPosition() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
|
@ -238,14 +241,17 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println("\ngetPayloadSpans test");
|
||||
}
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
|
||||
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
|
||||
+ " to " + pspans.endPosition());
|
||||
}
|
||||
Collection<byte[]> payloads = pspans.getPayload();
|
||||
collector.reset();
|
||||
pspans.collect(collector);
|
||||
Collection<byte[]> payloads = collector.getPayloads();
|
||||
sawZero |= pspans.startPosition() == 0;
|
||||
for (byte[] bytes : payloads) {
|
||||
count++;
|
||||
|
@ -256,7 +262,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
assertTrue(sawZero);
|
||||
assertEquals(5, count);
|
||||
assertEquals(8, count);
|
||||
|
||||
// System.out.println("\ngetSpans test");
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
@ -282,7 +288,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
//System.out.println(s);
|
||||
sawZero |= s.equals("pos: 0");
|
||||
}
|
||||
assertEquals(5, count);
|
||||
assertEquals(8, count);
|
||||
assertTrue(sawZero);
|
||||
writer.close();
|
||||
is.getIndexReader().close();
|
||||
|
|
|
@ -17,13 +17,6 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimplePayloadFilter;
|
||||
|
@ -36,9 +29,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.SpanNearPayloadCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanPositionRangeQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -50,6 +41,13 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/** basic test of payload-spans */
|
||||
public class TestPayloadBasics extends LuceneTestCase {
|
||||
private static IndexSearcher searcher;
|
||||
|
|
|
@ -16,32 +16,23 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.payloads.PayloadHelper;
|
||||
import org.apache.lucene.search.payloads.PayloadSpanUtil;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.spans.MultiSpansWrapper;
|
||||
|
@ -55,6 +46,13 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestPayloadSpans extends LuceneTestCase {
|
||||
private IndexSearcher searcher;
|
||||
private Similarity similarity = new DefaultSimilarity();
|
||||
|
@ -74,14 +72,15 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq;
|
||||
Spans spans;
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq, collector);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 1, 1, 1);
|
||||
checkSpans(spans, collector, 100, 1, 1, 1);
|
||||
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq, collector);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 0, 0, 0);
|
||||
checkSpans(spans, collector, 100, 0, 0, 0);
|
||||
}
|
||||
|
||||
public void testSpanFirst() throws IOException {
|
||||
|
@ -90,19 +89,20 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanFirstQuery sfq;
|
||||
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq);
|
||||
checkSpans(spans, 109, 1, 1, 1);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, collector);
|
||||
checkSpans(spans, collector, 109, 1, 1, 1);
|
||||
//Test more complicated subclause
|
||||
SpanQuery[] clauses = new SpanQuery[2];
|
||||
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
|
||||
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
|
||||
match = new SpanNearQuery(clauses, 0, true);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1);
|
||||
|
||||
match = new SpanNearQuery(clauses, 0, false);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1);
|
||||
|
||||
}
|
||||
|
||||
|
@ -124,9 +124,9 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
|
||||
checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2});
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
checkSpans(MultiSpansWrapper.wrap(reader, snq, collector), collector, 1, new int[]{2});
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -135,8 +135,10 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq;
|
||||
Spans spans;
|
||||
IndexSearcher searcher = getSearcher();
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, collector);
|
||||
assertNull(spans);
|
||||
|
||||
SpanQuery[] clauses = new SpanQuery[3];
|
||||
|
@ -145,9 +147,9 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
|
||||
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
checkSpans(spans, collector, 2, new int[]{3,3});
|
||||
|
||||
|
||||
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
|
||||
|
@ -156,10 +158,10 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
spanNearQuery = new SpanNearQuery(clauses, 6, true);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
checkSpans(spans, collector, 1, new int[]{3});
|
||||
|
||||
clauses = new SpanQuery[2];
|
||||
|
||||
|
@ -178,10 +180,9 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
|
||||
|
||||
// yy within 6 of xx within 6 of rr
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
checkSpans(spans, collector, 2, new int[]{3,3});
|
||||
closeIndexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -208,12 +209,13 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
|
||||
clauses3[1] = snq;
|
||||
|
||||
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
checkSpans(spans, collector, 1, new int[]{3});
|
||||
closeIndexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -248,9 +250,10 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{8, 8});
|
||||
checkSpans(spans, collector, 2, new int[]{8, 8});
|
||||
closeIndexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -272,15 +275,17 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
Collection<byte[]> payloads = collector.getPayloads();
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
@ -310,14 +315,17 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
Collection<byte[]> payloads = collector.getPayloads();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
|
@ -348,14 +356,17 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
Collection<byte[]> payloads = collector.getPayloads();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
|
@ -401,7 +412,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads,
|
||||
private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads,
|
||||
int expectedPayloadLength, int expectedFirstByte) throws IOException {
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
//each position match should have a span associated with it, since there is just one underlying term query, there should
|
||||
|
@ -409,16 +420,16 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
int seen = 0;
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable());
|
||||
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
assertEquals("payload size", expectedNumPayloads, payload.size());
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertEquals("payload length", expectedPayloadLength, thePayload.length);
|
||||
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
|
||||
}
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
|
||||
Collection<byte[]> payload = collector.getPayloads();
|
||||
assertEquals("payload size", expectedNumPayloads, payload.size());
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertEquals("payload length", expectedPayloadLength, thePayload.length);
|
||||
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
|
||||
}
|
||||
|
||||
seen++;
|
||||
}
|
||||
}
|
||||
|
@ -446,26 +457,26 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
return searcher;
|
||||
}
|
||||
|
||||
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
|
||||
private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException {
|
||||
int cnt = 0;
|
||||
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if(VERBOSE)
|
||||
System.out.println("\nSpans Dump --");
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
|
||||
collector.reset();
|
||||
spans.collect(collector);
|
||||
|
||||
Collection<byte[]> payload = collector.getPayloads();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
|
||||
+ new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
assertEquals("payload size", numPayloads[cnt], payload.size());
|
||||
} else { // no payload available
|
||||
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 );
|
||||
}
|
||||
assertEquals("payload size", numPayloads[cnt], payload.size());
|
||||
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,17 +17,16 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Holds all implementations of classes in the o.a.l.s.spans package as a
|
||||
* back-compatibility test. It does not run any tests per-se, however if
|
||||
|
@ -65,22 +64,17 @@ final class JustCompileSearchSpans {
|
|||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -100,7 +94,7 @@ final class JustCompileSearchSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) {
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
@ -137,22 +131,17 @@ final class JustCompileSearchSpans {
|
|||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
|
|
@ -17,11 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -30,6 +25,11 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
* A wrapper to perform span operations on a non-leaf reader context
|
||||
|
@ -40,6 +40,10 @@ import org.apache.lucene.util.Bits;
|
|||
public class MultiSpansWrapper {
|
||||
|
||||
public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
|
||||
return wrap(reader, spanQuery, SpanCollector.NO_OP);
|
||||
}
|
||||
|
||||
public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanCollector collector) throws IOException {
|
||||
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
|
||||
LeafReaderContext lrContext = lr.getContext();
|
||||
SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
|
||||
|
@ -50,7 +54,7 @@ public class MultiSpansWrapper {
|
|||
TermContext termContext = TermContext.build(lrContext, term);
|
||||
termContexts.put(term, termContext);
|
||||
}
|
||||
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
|
||||
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts, collector);
|
||||
return actSpans;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,17 +17,6 @@ package org.apache.lucene.search.highlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
|
@ -59,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
|
@ -69,6 +59,17 @@ import org.apache.lucene.search.spans.Spans;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
/**
|
||||
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
|
||||
|
@ -307,7 +308,7 @@ public class WeightedSpanTermExtractor {
|
|||
termContexts.put(term, TermContext.build(context, term));
|
||||
}
|
||||
Bits acceptDocs = context.reader().getLiveDocs();
|
||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts, SpanCollector.NO_OP);
|
||||
if (spans == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
|||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -29,6 +25,10 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Wraps a span query with asserts */
|
||||
public class AssertingSpanQuery extends SpanQuery {
|
||||
private final SpanQuery in;
|
||||
|
@ -43,8 +43,8 @@ public class AssertingSpanQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans spans = in.getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, SpanCollector collector) throws IOException {
|
||||
Spans spans = in.getSpans(context, acceptDocs, termContexts, collector);
|
||||
if (spans == null) {
|
||||
return null;
|
||||
} else {
|
||||
|
|
|
@ -17,12 +17,10 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Wraps a Spans with additional asserts
|
||||
|
@ -125,19 +123,13 @@ class AssertingSpans extends Spans {
|
|||
checkCurrentPositions();
|
||||
return in.endPosition();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
assert state == State.ITERATING : "getPayload() called in illegal state: " + state + ": " + in;
|
||||
return in.getPayload();
|
||||
public void collect(SpanCollector collector) throws IOException {
|
||||
assert state == State.ITERATING : "collect() called in illegal state: " + state + ": " + in;
|
||||
in.collect(collector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
assert state == State.ITERATING : "isPayloadAvailable() called in illegal state: " + state + ": " + in;
|
||||
return in.isPayloadAvailable();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
int doc = in.docID();
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
|
|
Loading…
Reference in New Issue