diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fa9b72d306c..495da50b41c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -189,6 +189,12 @@ API Changes * LUCENE-6445: Two new methods in Highlighter's TokenSources; the existing methods are now marked deprecated. (David Smiley) +* LUCENE-6371: Payload collection from Spans is moved to a more generic + SpanCollector framework. Spans no longer implements .hasPayload() and + .getPayload() methods, and instead exposes a collect() method that allows + the collection of arbitrary postings information. (Alan Woodward, David + Smiley, Paul Elschot) + Other * LUCENE-6413: Test runner should report the number of suites completed/ diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index 6a22d341046..c12bf2fb2cd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -17,11 +17,6 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Iterator; -import java.util.Objects; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; @@ -29,8 +24,6 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.search.spans.NearSpansOrdered; -import org.apache.lucene.search.spans.NearSpansUnordered; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; @@ -40,6 +33,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.Objects; + /** * This class is very similar to * {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors @@ -55,8 +53,10 @@ import org.apache.lucene.util.ToStringUtils; * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef) */ public class PayloadNearQuery extends SpanNearQuery { + protected String fieldName; protected PayloadFunction function; + protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector(); public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) { this(clauses, slop, inOrder, new AveragePayloadFunction()); @@ -129,17 +129,18 @@ public class PayloadNearQuery extends SpanNearQuery { } public class PayloadNearSpanWeight extends SpanWeight { + public PayloadNearSpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { - super(query, searcher); + super(query, searcher, payloadCollector); } @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - Spans spans = query.getSpans(context, acceptDocs, termContexts); + Spans spans = query.getSpans(context, acceptDocs, termContexts, payloadCollector); return (spans == null) ? null - : new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context)); + : new PayloadNearSpanScorer(spans, this, similarity.simScorer(stats, context)); } @Override @@ -176,31 +177,11 @@ public class PayloadNearQuery extends SpanNearQuery { protected float payloadScore; private int payloadsSeen; - protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, - Similarity similarity, Similarity.SimScorer docScorer) throws IOException { + protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(spans, weight, docScorer); this.spans = spans; } - // Get the payloads associated with all underlying subspans - public void getPayloads(Spans[] subSpans) throws IOException { - for (int i = 0; i < subSpans.length; i++) { - if (subSpans[i] instanceof NearSpansOrdered) { - if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) { - processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(), - subSpans[i].startPosition(), subSpans[i].endPosition()); - } - getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans()); - } else if (subSpans[i] instanceof NearSpansUnordered) { - if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) { - processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(), - subSpans[i].startPosition(), subSpans[i].endPosition()); - } - getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans()); - } - } - } - // TODO change the whole spans api to use bytesRef, or nuke spans BytesRef scratch = new BytesRef(); @@ -237,9 +218,9 @@ public class PayloadNearQuery extends SpanNearQuery { do { int matchLength = spans.endPosition() - startPos; freq += docScorer.computeSlopFactor(matchLength); - Spans[] spansArr = new Spans[1]; - spansArr[0] = spans; - getPayloads(spansArr); + payloadCollector.reset(); + spans.collect(payloadCollector); + processPayloads(payloadCollector.getPayloads(), startPos, spans.endPosition()); startPos = spans.nextStartPosition(); } while (startPos != Spans.NO_MORE_POSITIONS); } diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java new file mode 100644 index 00000000000..722386aa56c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java @@ -0,0 +1,103 @@ +package org.apache.lucene.search.payloads; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.BufferedSpanCollector; +import org.apache.lucene.search.spans.SpanCollector; +import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; + +/** + * SpanCollector implementation that collects payloads from a {@link Spans} + */ +public class PayloadSpanCollector implements SpanCollector { + + private final Collection payloads = new ArrayList<>(); + BufferedPayloadCollector bufferedCollector; + + public Collection getPayloads() { + return payloads; + } + + @Override + public void reset() { + payloads.clear(); + } + + @Override + public int requiredPostings() { + return PostingsEnum.PAYLOADS; + } + + @Override + public void collectLeaf(PostingsEnum postings, Term term) throws IOException { + BytesRef payload = postings.getPayload(); + if (payload == null) + return; + final byte[] bytes = new byte[payload.length]; + System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length); + payloads.add(bytes); + } + + @Override + public BufferedSpanCollector buffer() { + if (bufferedCollector == null) + bufferedCollector = new BufferedPayloadCollector(); + bufferedCollector.reset(); + return bufferedCollector; + } + + @Override + public SpanCollector bufferedCollector() { + if (bufferedCollector == null) + bufferedCollector = new BufferedPayloadCollector(); + return bufferedCollector.candidateCollector; + } + + class BufferedPayloadCollector implements BufferedSpanCollector { + + final Collection buffer = new ArrayList<>(); + PayloadSpanCollector candidateCollector = new PayloadSpanCollector(); + + void reset() { + buffer.clear(); + } + + @Override + public void collectCandidate(Spans spans) throws IOException { + candidateCollector.reset(); + spans.collect(candidateCollector); + } + + @Override + public void accept() { + buffer.addAll(candidateCollector.payloads); + } + + @Override + public void replay() { + payloads.addAll(buffer); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java index bfc08080f44..6d3dab55d01 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java @@ -17,15 +17,6 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeSet; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; @@ -46,6 +37,15 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; + /** * Experimental class to get set of payloads for most standard Lucene queries. * Operates like Highlighter - IndexReader should only contain doc of interest, @@ -187,17 +187,16 @@ public class PayloadSpanUtil { for (Term term : terms) { termContexts.put(term, TermContext.build(context, term)); } + + PayloadSpanCollector collector = new PayloadSpanCollector(); for (LeafReaderContext leafReaderContext : context.leaves()) { - final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts); + final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts, collector); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - for (byte [] bytes : payload) { - payloads.add(bytes); - } - } + collector.reset(); + spans.collect(collector); + payloads.addAll(collector.getPayloads()); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index ec98590de5c..c29f3a74da7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -17,9 +17,6 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import java.io.IOException; -import java.util.Objects; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; @@ -28,6 +25,8 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.spans.BufferedSpanCollector; +import org.apache.lucene.search.spans.SpanCollector; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.SpanTermQuery; @@ -37,6 +36,9 @@ import org.apache.lucene.search.spans.TermSpans; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import java.io.IOException; +import java.util.Objects; + /** * This class is very similar to * {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors @@ -67,19 +69,52 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new PayloadTermWeight(this, searcher); + return new PayloadTermWeight(this, searcher, new PayloadTermCollector()); + } + + protected class PayloadTermCollector implements SpanCollector { + + BytesRef payload; + + @Override + public void reset() { + payload = null; + } + + @Override + public int requiredPostings() { + return PostingsEnum.PAYLOADS; + } + + @Override + public void collectLeaf(PostingsEnum postings, Term term) throws IOException { + payload = postings.getPayload(); + } + + @Override + public BufferedSpanCollector buffer() { + throw new UnsupportedOperationException(); + } + + @Override + public SpanCollector bufferedCollector() { + throw new UnsupportedOperationException(); + } } protected class PayloadTermWeight extends SpanWeight { - public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher) + final PayloadTermCollector payloadCollector; + + public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher, PayloadTermCollector collector) throws IOException { - super(query, searcher); + super(query, searcher, collector); + this.payloadCollector = collector; } @Override public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts); + TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts, payloadCollector); return (spans == null) ? null : new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context)); @@ -109,29 +144,22 @@ public class PayloadTermQuery extends SpanTermQuery { freq += docScorer.computeSlopFactor(matchLength); numMatches++; - processPayload(similarity); + payloadCollector.reset(); + spans.collect(payloadCollector); + processPayload(); startPos = spans.nextStartPosition(); } while (startPos != Spans.NO_MORE_POSITIONS); } - protected void processPayload(Similarity similarity) throws IOException { - if (spans.isPayloadAvailable()) { - final PostingsEnum postings = termSpans.getPostings(); - payload = postings.getPayload(); - if (payload != null) { - payloadScore = function.currentScore(docID(), term.field(), - spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, - docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload)); - } else { - payloadScore = function.currentScore(docID(), term.field(), - spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F); - } - payloadsSeen++; + protected void processPayload() throws IOException { + + float payloadFactor = payloadCollector.payload == null ? 1F : + docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payloadCollector.payload); + payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(), + payloadsSeen, payloadScore, payloadFactor); + payloadsSeen++; - } else { - // zero out the payload? - } } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java similarity index 66% rename from lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java rename to lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java index d67c260b4f7..7488f49bb1a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanNearPayloadCheckQuery.java @@ -1,4 +1,4 @@ -package org.apache.lucene.search.spans; +package org.apache.lucene.search.payloads; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,7 +16,12 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanPositionCheckQuery; +import org.apache.lucene.search.spans.SpanWeight; +import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -30,10 +35,12 @@ import java.util.Objects; * the given position. */ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery { + protected final Collection payloadToMatch; + protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector(); /** - * @param match The underlying {@link SpanQuery} to check + * @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check * @param payloadToMatch The {@link java.util.Collection} of payloads to match */ public SpanNearPayloadCheckQuery(SpanNearQuery match, Collection payloadToMatch) { @@ -41,35 +48,41 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery { this.payloadToMatch = Objects.requireNonNull(payloadToMatch); } + @Override + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new SpanWeight(this, searcher, payloadCollector); + } + @Override protected AcceptStatus acceptPosition(Spans spans) throws IOException { - boolean result = spans.isPayloadAvailable(); - if (result == true) { - Collection candidate = spans.getPayload(); - if (candidate.size() == payloadToMatch.size()) { - //TODO: check the byte arrays are the same - //hmm, can't rely on order here - int matches = 0; - for (byte[] candBytes : candidate) { - //Unfortunately, we can't rely on order, so we need to compare all - for (byte[] payBytes : payloadToMatch) { - if (Arrays.equals(candBytes, payBytes) == true) { - matches++; - break; - } + + payloadCollector.reset(); + spans.collect(payloadCollector); + + Collection candidate = payloadCollector.getPayloads(); + if (candidate.size() == payloadToMatch.size()) { + //TODO: check the byte arrays are the same + //hmm, can't rely on order here + int matches = 0; + for (byte[] candBytes : candidate) { + //Unfortunately, we can't rely on order, so we need to compare all + for (byte[] payBytes : payloadToMatch) { + if (Arrays.equals(candBytes, payBytes) == true) { + matches++; + break; } } - if (matches == payloadToMatch.size()){ - //we've verified all the bytes - return AcceptStatus.YES; - } else { - return AcceptStatus.NO; - } + } + if (matches == payloadToMatch.size()){ + //we've verified all the bytes + return AcceptStatus.YES; } else { return AcceptStatus.NO; } + } else { + return AcceptStatus.NO; } - return AcceptStatus.NO; + } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java similarity index 66% rename from lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java rename to lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java index 6e1ac5d3ba6..779acba07e1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java @@ -1,4 +1,4 @@ -package org.apache.lucene.search.spans; +package org.apache.lucene.search.payloads; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,7 +16,14 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.payloads.PayloadSpanCollector; import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanPositionCheckQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanWeight; +import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -30,11 +37,13 @@ import java.util.Iterator; * the given position. *

* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. - * Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads + * Instead, use {@link org.apache.lucene.search.payloads.SpanNearPayloadCheckQuery} since it properly handles the fact that payloads * aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}. */ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery { + protected final Collection payloadToMatch; + protected final PayloadSpanCollector payloadCollector = new PayloadSpanCollector(); /** * @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check @@ -48,29 +57,35 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery { this.payloadToMatch = payloadToMatch; } + @Override + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new SpanWeight(this, searcher, payloadCollector); + } + @Override protected AcceptStatus acceptPosition(Spans spans) throws IOException { - boolean result = spans.isPayloadAvailable(); - if (result == true){ - Collection candidate = spans.getPayload(); - if (candidate.size() == payloadToMatch.size()){ - //TODO: check the byte arrays are the same - Iterator toMatchIter = payloadToMatch.iterator(); - //check each of the byte arrays, in order - //hmm, can't rely on order here - for (byte[] candBytes : candidate) { - //if one is a mismatch, then return false - if (Arrays.equals(candBytes, toMatchIter.next()) == false){ - return AcceptStatus.NO; - } + + payloadCollector.reset(); + spans.collect(payloadCollector); + + Collection candidate = payloadCollector.getPayloads(); + if (candidate.size() == payloadToMatch.size()){ + //TODO: check the byte arrays are the same + Iterator toMatchIter = payloadToMatch.iterator(); + //check each of the byte arrays, in order + //hmm, can't rely on order here + for (byte[] candBytes : candidate) { + //if one is a mismatch, then return false + if (Arrays.equals(candBytes, toMatchIter.next()) == false){ + return AcceptStatus.NO; } - //we've verified all the bytes - return AcceptStatus.YES; - } else { - return AcceptStatus.NO; } + //we've verified all the bytes + return AcceptStatus.YES; + } else { + return AcceptStatus.NO; } - return AcceptStatus.YES; + } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/BufferedSpanCollector.java b/lucene/core/src/java/org/apache/lucene/search/spans/BufferedSpanCollector.java new file mode 100644 index 00000000000..33b583e6de6 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/spans/BufferedSpanCollector.java @@ -0,0 +1,67 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * Defines span collection for eager Span implementations, such as + * {@link org.apache.lucene.search.spans.NearSpansOrdered} + * + * @lucene.experimental + */ +public interface BufferedSpanCollector { + + /** + * Collect information from a possible candidate + * @param spans the candidate Spans + * @throws IOException on error + */ + public void collectCandidate(Spans spans) throws IOException; + + /** + * Confirm that the last candidate Spans has been accepted by the parent algorithm + */ + public void accept(); + + /** + * Replay buffered information back to the parent SpanCollector + */ + public void replay(); + + /** + * A default No-op BufferedSpanCollector + */ + public static final BufferedSpanCollector NO_OP = new BufferedSpanCollector() { + @Override + public void collectCandidate(Spans spans) throws IOException { + + } + + @Override + public void accept() { + + } + + @Override + public void replay() { + + } + }; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java index 65c8b4703f6..45b0af7d6ab 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java @@ -19,7 +19,6 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.Objects; abstract class ContainSpans extends ConjunctionSpans { @@ -49,12 +48,8 @@ abstract class ContainSpans extends ConjunctionSpans { } @Override - public boolean isPayloadAvailable() throws IOException { - return sourceSpans.isPayloadAvailable(); + public void collect(SpanCollector collector) throws IOException { + sourceSpans.collect(collector); } - @Override - public Collection getPayload() throws IOException { - return sourceSpans.getPayload(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 73a520e17a5..780d64badee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -17,21 +17,20 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.Set; -import java.util.Objects; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + /** *

Wrapper to allow {@link SpanQuery} objects participate in composite * single-field SpanQueries by 'lying' about their search field. That is, @@ -97,8 +96,8 @@ public class FieldMaskingSpanQuery extends SpanQuery { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - return maskedQuery.getSpans(context, acceptDocs, termContexts); + public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { + return maskedQuery.getSpans(context, acceptDocs, termContexts, collector); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java index af33e675355..168f3500ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java @@ -17,12 +17,11 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Objects; - import org.apache.lucene.search.TwoPhaseIterator; +import java.io.IOException; +import java.util.Objects; + /** * A {@link Spans} implementation wrapping another spans instance, * allowing to filter spans matches easily by implementing {@link #accept} @@ -110,17 +109,12 @@ public abstract class FilterSpans extends Spans { return atFirstInCurrentDoc ? -1 : (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS; } - - @Override - public final Collection getPayload() throws IOException { - return in.getPayload(); - } @Override - public final boolean isPayloadAvailable() throws IOException { - return in.isPayloadAvailable(); + public void collect(SpanCollector collector) throws IOException { + in.collect(collector); } - + @Override public final long cost() { return in.cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index 05229f2650f..541bc2c7ac8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -19,12 +19,9 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.List; -import java.util.Collection; /** A Spans that is formed from the ordered subspans of a SpanNearQuery - * where the subspans do not overlap and have a maximum slop between them, - * and that does not need to collect payloads. - * To also collect payloads, see {@link NearSpansPayloadOrdered}. + * where the subspans do not overlap and have a maximum slop between them. *

* The formed spans only contains minimum slop matches.
* The matching slop is computed from the distance(s) between @@ -41,6 +38,9 @@ import java.util.Collection; *

t1 t2 .. t3      
*
      t1 .. t2 t3
* + * Because the algorithm used to minimize the size of a match consumes + * child Spans eagerly, this uses a BufferedSpanCollector to collect + * information from subspans. * * Expert: * Only public for subclassing. Most implementations should not need this class @@ -51,9 +51,13 @@ public class NearSpansOrdered extends NearSpans { protected int matchStart = -1; protected int matchEnd = -1; - public NearSpansOrdered(SpanNearQuery query, List subSpans) throws IOException { + protected final SpanCollector collector; + protected BufferedSpanCollector buffer; + + public NearSpansOrdered(SpanNearQuery query, List subSpans, SpanCollector collector) throws IOException { super(query, subSpans); this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1 + this.collector = collector; } @Override @@ -140,10 +144,15 @@ public class NearSpansOrdered extends NearSpans { matchStart = lastSubSpans.startPosition(); matchEnd = lastSubSpans.endPosition(); + buffer = collector.buffer(); + buffer.collectCandidate(subSpans[subSpans.length - 1]); + buffer.accept(); + int matchSlop = 0; int lastStart = matchStart; for (int i = subSpans.length - 2; i >= 0; i--) { Spans prevSpans = subSpans[i]; + buffer.collectCandidate(prevSpans); int prevStart = prevSpans.startPosition(); int prevEnd = prevSpans.endPosition(); @@ -160,8 +169,11 @@ public class NearSpansOrdered extends NearSpans { // prevSpans still before (lastStart, lastEnd) prevStart = ppStart; prevEnd = ppEnd; + buffer.collectCandidate(prevSpans); } + buffer.accept(); + assert prevStart <= matchStart; if (matchStart > prevEnd) { // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); @@ -190,13 +202,10 @@ public class NearSpansOrdered extends NearSpans { } @Override - public Collection getPayload() throws IOException { - return null; - } - - @Override - public boolean isPayloadAvailable() { - return false; + public void collect(SpanCollector collector) { + assert collector == this.collector + : "You must collect using the same SpanCollector as was passed to the NearSpans constructor"; + buffer.replay(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java deleted file mode 100644 index 163aef80b40..00000000000 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java +++ /dev/null @@ -1,144 +0,0 @@ -package org.apache.lucene.search.spans; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Collection; -import java.util.Set; - -/** A {@link NearSpansOrdered} that allows collecting payloads. - * Expert: - * Only public for subclassing. Most implementations should not need this class - */ -public class NearSpansPayloadOrdered extends NearSpansOrdered { - - private List matchPayload; - private Set possibleMatchPayloads; - - public NearSpansPayloadOrdered(SpanNearQuery query, List subSpans) - throws IOException { - super(query, subSpans); - this.matchPayload = new LinkedList<>(); - this.possibleMatchPayloads = new HashSet<>(); - } - - /** The subSpans are ordered in the same doc, so there is a possible match. - * Compute the slop while making the match as short as possible by using nextStartPosition - * on all subSpans, except the last one, in reverse order. - * Also collect the payloads. - */ - protected boolean shrinkToAfterShortestMatch() throws IOException { - Spans lastSubSpans = subSpans[subSpans.length - 1]; - matchStart = lastSubSpans.startPosition(); - matchEnd = lastSubSpans.endPosition(); - - matchPayload.clear(); - possibleMatchPayloads.clear(); - - if (lastSubSpans.isPayloadAvailable()) { - possibleMatchPayloads.addAll(lastSubSpans.getPayload()); - } - - Collection possiblePayload = null; - - int matchSlop = 0; - int lastStart = matchStart; - for (int i = subSpans.length - 2; i >= 0; i--) { - Spans prevSpans = subSpans[i]; - - if (prevSpans.isPayloadAvailable()) { - Collection payload = prevSpans.getPayload(); - possiblePayload = new ArrayList<>(payload.size()); - possiblePayload.addAll(payload); - } - - int prevStart = prevSpans.startPosition(); - int prevEnd = prevSpans.endPosition(); - while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd) - if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) { - oneExhaustedInCurrentDoc = true; - break; // Check remaining subSpans for match. - } - int ppStart = prevSpans.startPosition(); - int ppEnd = prevSpans.endPosition(); - if (ppEnd > lastStart) { // if overlapping spans - break; // Check remaining subSpans. - } - // prevSpans still before (lastStart, lastEnd) - prevStart = ppStart; - prevEnd = ppEnd; - if (prevSpans.isPayloadAvailable()) { - Collection payload = prevSpans.getPayload(); - if (possiblePayload == null) { - possiblePayload = new ArrayList<>(payload.size()); - } else { - possiblePayload.clear(); - } - possiblePayload.addAll(payload); - } - } - - if (possiblePayload != null) { - possibleMatchPayloads.addAll(possiblePayload); - } - - assert prevStart <= matchStart; - if (matchStart > prevEnd) { // Only non overlapping spans add to slop. - matchSlop += (matchStart - prevEnd); - } - - /* Do not break on (matchSlop > allowedSlop) here to make sure - * that on return the first subSpans has nextStartPosition called. - */ - matchStart = prevStart; - lastStart = prevStart; - } - - boolean match = matchSlop <= allowedSlop; - - if (match && possibleMatchPayloads.size() > 0) { - matchPayload.addAll(possibleMatchPayloads); - } - - return match; // ordered and allowed slop - } - - // TODO: Remove warning after API has been finalized - // TODO: Would be nice to be able to lazy load payloads - /** Return payloads when available. */ - @Override - public Collection getPayload() throws IOException { - return matchPayload; - } - - /** Indicates whether payloads are available */ - @Override - public boolean isPayloadAvailable() { - return ! matchPayload.isEmpty(); - } - - @Override - public String toString() { - return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition(); - } -} - diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index bd22c30baee..4ea2ed431d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -22,10 +22,7 @@ import org.apache.lucene.util.PriorityQueue; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.List; -import java.util.Set; -import java.util.HashSet; /** * Similar to {@link NearSpansOrdered}, but for the unordered case. @@ -118,13 +115,8 @@ public class NearSpansUnordered extends NearSpans { } @Override - public Collection getPayload() throws IOException { - return in.getPayload(); - } - - @Override - public boolean isPayloadAvailable() throws IOException { - return in.isPayloadAvailable(); + public void collect(SpanCollector collector) throws IOException { + in.collect(collector); } @Override @@ -249,31 +241,11 @@ public class NearSpansUnordered extends NearSpans { : maxEndPositionCell.endPosition(); } - - /** - * WARNING: The List is not necessarily in order of the positions. - * @return Collection of byte[] payloads - * @throws IOException if there is a low-level I/O error - */ @Override - public Collection getPayload() throws IOException { - Set matchPayload = new HashSet<>(); + public void collect(SpanCollector collector) throws IOException { for (SpansCell cell : subSpanCells) { - if (cell.isPayloadAvailable()) { - matchPayload.addAll(cell.getPayload()); - } + cell.collect(collector); } - return matchPayload; - } - - @Override - public boolean isPayloadAvailable() throws IOException { - for (SpansCell cell : subSpanCells) { - if (cell.isPayloadAvailable()) { - return true; - } - } - return false; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollector.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollector.java new file mode 100644 index 00000000000..2ccca58681f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanCollector.java @@ -0,0 +1,110 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; + +import java.io.IOException; + +/** + * An interface defining the collection of postings information from the leaves + * of a {@link org.apache.lucene.search.spans.Spans} + * + * Typical use would be as follows: + *
+ *   while (spans.nextStartPosition() != NO_MORE_POSITIONS) {
+ *     spanCollector.reset();
+ *     spans.collect(spanCollector);
+ *     doSomethingWith(spanCollector);
+ *   }
+ * 
+ * + * @lucene.experimental + */ +public interface SpanCollector { + + /** + * Called to indicate that the driving {@link org.apache.lucene.search.spans.Spans} has + * been moved to a new position + */ + public void reset(); + + /** + * Returns an integer indicating what postings information should be retrieved + * + * See {@link org.apache.lucene.index.TermsEnum#postings(org.apache.lucene.util.Bits, org.apache.lucene.index.PostingsEnum, int)} + * + * @return the postings flag + */ + public int requiredPostings(); + + /** + * Collect information from postings + * @param postings a {@link PostingsEnum} + * @param term the {@link Term} for this postings list + * @throws IOException on error + */ + public void collectLeaf(PostingsEnum postings, Term term) throws IOException; + + /** + * Return a {@link BufferedSpanCollector} for use by eager spans implementations, such + * as {@link NearSpansOrdered}. + * + * @return a BufferedSpanCollector + */ + public BufferedSpanCollector buffer(); + + /** + * @return the SpanCollector used by the {@link org.apache.lucene.search.spans.BufferedSpanCollector} + * returned from {@link #buffer()}. + */ + public SpanCollector bufferedCollector(); + + /** + * A default No-op implementation of SpanCollector + */ + public static final SpanCollector NO_OP = new SpanCollector() { + + @Override + public void reset() { + + } + + @Override + public int requiredPostings() { + return PostingsEnum.POSITIONS; + } + + @Override + public void collectLeaf(PostingsEnum postings, Term term) { + + } + + @Override + public BufferedSpanCollector buffer() { + return BufferedSpanCollector.NO_OP; + } + + @Override + public SpanCollector bufferedCollector() { + return this; + } + }; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java index a85f6eb095e..43197c24c85 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java @@ -17,19 +17,19 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Map; -import java.util.Set; -import java.util.Objects; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + abstract class SpanContainQuery extends SpanQuery implements Cloneable { SpanQuery big; SpanQuery little; @@ -55,12 +55,12 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable { little.extractTerms(terms); } - ArrayList prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { - Spans bigSpans = big.getSpans(context, acceptDocs, termContexts); + ArrayList prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { + Spans bigSpans = big.getSpans(context, acceptDocs, termContexts, collector); if (bigSpans == null) { return null; } - Spans littleSpans = little.getSpans(context, acceptDocs, termContexts); + Spans littleSpans = little.getSpans(context, acceptDocs, termContexts, collector); if (littleSpans == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index d5cb4d1a0ad..9a0b3c1fa8f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -17,15 +17,15 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.ArrayList; - import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map; + /** Keep matches that contain another Spans. */ public class SpanContainingQuery extends SpanContainQuery { /** Construct a SpanContainingQuery matching spans from big @@ -54,8 +54,8 @@ public class SpanContainingQuery extends SpanContainQuery { * The payload is from the spans of big. */ @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { - ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts); + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { + ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts, collector); if (containerContained == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index 812b28c3aa4..44b88e4bae3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -17,22 +17,22 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.Set; -import java.util.Objects; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.ScoringRewrite; -import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, * so it can be nested within other SpanQuery classes. @@ -99,7 +99,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { throw new UnsupportedOperationException("Query should have been rewritten"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 42238fa777f..844b2d9a9b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -17,15 +17,8 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; @@ -33,6 +26,13 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + /** Matches spans which are near one another. One can specify slop, the * maximum number of intervening unmatched positions, as well as whether * matches are required to be in-order. @@ -118,11 +118,17 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - ArrayList subSpans = new ArrayList<>(clauses.size()); + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { + Terms terms = context.reader().terms(field); + if (terms == null) { + return null; // field does not exist + } + + ArrayList subSpans = new ArrayList<>(clauses.size()); + SpanCollector subSpanCollector = inOrder ? collector.bufferedCollector() : collector; for (SpanQuery seq : clauses) { - Spans subSpan = seq.getSpans(context, acceptDocs, termContexts); + Spans subSpan = seq.getSpans(context, acceptDocs, termContexts, subSpanCollector); if (subSpan != null) { subSpans.add(subSpan); } else { @@ -130,15 +136,9 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } } - Terms terms = context.reader().terms(field); - if (terms == null) { - return null; // field does not exist - } // all NearSpans require at least two subSpans - return (! inOrder) ? new NearSpansUnordered(this, subSpans) - : collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans) - : new NearSpansOrdered(this, subSpans); + return (! inOrder) ? new NearSpansUnordered(this, subSpans) : new NearSpansOrdered(this, subSpans, collector); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 4a33890644b..ce1b841176b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -105,13 +105,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { - Spans includeSpans = include.getSpans(context, acceptDocs, termContexts); + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { + Spans includeSpans = include.getSpans(context, acceptDocs, termContexts, collector); if (includeSpans == null) { return null; } - Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts); + Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts, collector); if (excludeSpans == null) { return includeSpans; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 1a0b0b5a949..d84e299e6d0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -17,26 +17,24 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; - -import java.util.List; -import java.util.Collection; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.search.Query; import org.apache.lucene.search.DisiPriorityQueue; import org.apache.lucene.search.DisiWrapper; -import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.DisjunctionDISIApproximation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ToStringUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; /** Matches the union of its clauses. @@ -147,13 +145,13 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { ArrayList subSpans = new ArrayList<>(clauses.size()); for (SpanQuery sq : clauses) { - Spans spans = sq.getSpans(context, acceptDocs, termContexts); + Spans spans = sq.getSpans(context, acceptDocs, termContexts, collector); if (spans != null) { subSpans.add(spans); } @@ -306,17 +304,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { } @Override - public Collection getPayload() throws IOException { - return topPositionSpans == null - ? null - : topPositionSpans.isPayloadAvailable() - ? new ArrayList<>(topPositionSpans.getPayload()) - : null; - } - - @Override - public boolean isPayloadAvailable() throws IOException { - return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable(); + public void collect(SpanCollector collector) throws IOException { + if (topPositionSpans != null) + topPositionSpans.collect(collector); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 96d331fc3b4..57b757c91ec 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -77,8 +77,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - Spans matchSpans = match.getSpans(context, acceptDocs, termContexts); + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { + Spans matchSpans = match.getSpans(context, acceptDocs, termContexts, collector); return (matchSpans == null) ? null : new FilterSpans(matchSpans) { @Override protected AcceptStatus accept(Spans candidate) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index 945372a7aef..bc40b4f967f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -17,10 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -29,13 +25,17 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Map; +import java.util.Set; + /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { /** Expert: Returns the matches for this query in an index. * Used internally to search for spans. * This may return null to indicate that the SpanQuery has no results. */ - public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException; + public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException; /** * Extract terms from these spans. @@ -53,7 +53,7 @@ public abstract class SpanQuery extends Query { @Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new SpanWeight(this, searcher); + return new SpanWeight(this, searcher, SpanCollector.NO_OP); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index f13f3f97663..3ac2f2d62c6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -17,13 +17,8 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.Set; -import java.util.Objects; - -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; @@ -32,6 +27,11 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + /** Matches spans containing a term. * This should not be used for terms that are indexed at position Integer.MAX_VALUE. */ @@ -83,7 +83,7 @@ public class SpanTermQuery extends SpanQuery { } @Override - public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { TermContext termContext = termContexts.get(term); final TermState state; if (termContext == null) { @@ -115,7 +115,7 @@ public class SpanTermQuery extends SpanQuery { final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(); termsEnum.seekExact(term.bytes(), state); - final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS); + final PostingsEnum postings = termsEnum.postings(acceptDocs, null, collector.requiredPostings()); return new TermSpans(postings, term); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 260bfbde75b..4664420d99b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -17,12 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; - import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -37,6 +31,12 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + /** * Expert-only. Public for use by other weight implementations */ @@ -44,12 +44,14 @@ public class SpanWeight extends Weight { protected final Similarity similarity; protected final Map termContexts; protected final SpanQuery query; + protected final SpanCollector collector; protected Similarity.SimWeight stats; - public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { + public SpanWeight(SpanQuery query, IndexSearcher searcher, SpanCollector collector) throws IOException { super(query); this.similarity = searcher.getSimilarity(); this.query = query; + this.collector = collector; termContexts = new HashMap<>(); TreeSet terms = new TreeSet<>(); @@ -97,7 +99,7 @@ public class SpanWeight extends Weight { if (terms != null && terms.hasPositions() == false) { throw new IllegalStateException("field \"" + query.getField() + "\" was indexed without position data; cannot run SpanQuery (query=" + query + ")"); } - Spans spans = query.getSpans(context, acceptDocs, termContexts); + Spans spans = query.getSpans(context, acceptDocs, termContexts, collector); return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context)); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 8bdb439ded0..eb1a2af0acc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -17,15 +17,15 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.ArrayList; - import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map; + /** Keep matches that are contained within another Spans. */ public class SpanWithinQuery extends SpanContainQuery { /** Construct a SpanWithinQuery matching spans from little @@ -54,8 +54,8 @@ public class SpanWithinQuery extends SpanContainQuery { * The payload is from the spans of little. */ @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { - ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts); + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts, SpanCollector collector) throws IOException { + ArrayList containerContained = prepareConjunction(context, acceptDocs, termContexts, collector); if (containerContained == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java index 7bf112365e5..d1a822e62e7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java @@ -17,12 +17,11 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; - import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TwoPhaseIterator; +import java.io.IOException; + /** Iterates through combinations of start/end positions per-doc. * Each start/end position represents a range of term positions within the current document. * These are enumerated in order, by increasing document number, within that by @@ -51,33 +50,12 @@ public abstract class Spans extends DocIdSetIterator { public abstract int endPosition(); /** - * Returns the payload data for the current start/end position. - * This is only valid after {@link #nextStartPosition()} - * returned an available start position. - * This method must not be called more than once after each call - * of {@link #nextStartPosition()}. However, most payloads are loaded lazily, - * so if the payload data for the current position is not needed, - * this method may not be called at all for performance reasons. - *
- * Note that the return type is a collection, thus the ordering should not be relied upon. - *
+ * Collect data from the current Spans + * @param collector a SpanCollector + * * @lucene.experimental - * - * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false - * @throws IOException if there is a low-level I/O error */ - public abstract Collection getPayload() throws IOException; - - /** - * Checks if a payload can be loaded at the current start/end position. - *

- * Payloads can only be loaded once per call to - * {@link #nextStartPosition()}. - * - * @return true if there is a payload available at this start/end position - * that can be loaded - */ - public abstract boolean isPayloadAvailable() throws IOException; + public abstract void collect(SpanCollector collector) throws IOException; /** * Optional method: Return a {@link TwoPhaseIterator} view of this diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index 5351b3d5513..43663bcb351 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -16,14 +16,11 @@ package org.apache.lucene.search.spans; */ -import org.apache.lucene.index.Term; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; import java.io.IOException; -import java.util.Collections; -import java.util.Collection; import java.util.Objects; /** @@ -109,6 +106,7 @@ public class TermSpans extends Spans { return postings.cost(); } + /* @Override public Collection getPayload() throws IOException { final BytesRef payload = postings.getPayload(); @@ -127,6 +125,12 @@ public class TermSpans extends Spans { public boolean isPayloadAvailable() throws IOException { return readPayload == false && postings.getPayload() != null; } + */ + + @Override + public void collect(SpanCollector collector) throws IOException { + collector.collectLeaf(postings, term); + } @Override public String toString() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index dc1b2f308cd..58b33dd5416 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -17,34 +17,37 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.io.StringReader; -import java.nio.charset.StandardCharsets; -import java.util.Collection; - -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockPayloadAnalyzer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; +import org.apache.lucene.search.payloads.PayloadSpanCollector; import org.apache.lucene.search.payloads.PayloadSpanUtil; import org.apache.lucene.search.spans.MultiSpansWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.util.Collection; /** * Term position unit test. @@ -53,7 +56,7 @@ import org.apache.lucene.util.BytesRef; */ public class TestPositionIncrement extends LuceneTestCase { - final static boolean VERBOSE = false; + final static boolean VERBOSE = true; public void testSetPosition() throws Exception { Analyzer analyzer = new Analyzer() { @@ -238,14 +241,17 @@ public class TestPositionIncrement extends LuceneTestCase { if (VERBOSE) { System.out.println("\ngetPayloadSpans test"); } - Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); + PayloadSpanCollector collector = new PayloadSpanCollector(); + Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition() + " to " + pspans.endPosition()); } - Collection payloads = pspans.getPayload(); + collector.reset(); + pspans.collect(collector); + Collection payloads = collector.getPayloads(); sawZero |= pspans.startPosition() == 0; for (byte[] bytes : payloads) { count++; @@ -256,7 +262,7 @@ public class TestPositionIncrement extends LuceneTestCase { } } assertTrue(sawZero); - assertEquals(5, count); + assertEquals(8, count); // System.out.println("\ngetSpans test"); Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); @@ -282,7 +288,7 @@ public class TestPositionIncrement extends LuceneTestCase { //System.out.println(s); sawZero |= s.equals("pos: 0"); } - assertEquals(5, count); + assertEquals(8, count); assertTrue(sawZero); writer.close(); is.getIndexReader().close(); diff --git a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadBasics.java b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadBasics.java index a233d407eb9..b9a7fae6640 100644 --- a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadBasics.java +++ b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadBasics.java @@ -17,13 +17,6 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.SimplePayloadFilter; @@ -36,9 +29,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.spans.SpanNearPayloadCheckQuery; import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanPayloadCheckQuery; import org.apache.lucene.search.spans.SpanPositionRangeQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -50,6 +41,13 @@ import org.apache.lucene.util.TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + /** basic test of payload-spans */ public class TestPayloadBasics extends LuceneTestCase { private static IndexSearcher searcher; diff --git a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java index d351680e5ef..9079febcbdf 100644 --- a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java @@ -16,32 +16,23 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import java.io.IOException; -import java.io.StringReader; -import java.nio.charset.StandardCharsets; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.payloads.PayloadHelper; -import org.apache.lucene.search.payloads.PayloadSpanUtil; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.spans.MultiSpansWrapper; @@ -55,6 +46,13 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + public class TestPayloadSpans extends LuceneTestCase { private IndexSearcher searcher; private Similarity similarity = new DefaultSimilarity(); @@ -74,14 +72,15 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq; Spans spans; stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); - spans = MultiSpansWrapper.wrap(indexReader, stq); + PayloadSpanCollector collector = new PayloadSpanCollector(); + spans = MultiSpansWrapper.wrap(indexReader, stq, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 100, 1, 1, 1); + checkSpans(spans, collector, 100, 1, 1, 1); stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); - spans = MultiSpansWrapper.wrap(indexReader, stq); + spans = MultiSpansWrapper.wrap(indexReader, stq, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 100, 0, 0, 0); + checkSpans(spans, collector, 100, 0, 0, 0); } public void testSpanFirst() throws IOException { @@ -90,19 +89,20 @@ public class TestPayloadSpans extends LuceneTestCase { SpanFirstQuery sfq; match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); sfq = new SpanFirstQuery(match, 2); - Spans spans = MultiSpansWrapper.wrap(indexReader, sfq); - checkSpans(spans, 109, 1, 1, 1); + PayloadSpanCollector collector = new PayloadSpanCollector(); + Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, collector); + checkSpans(spans, collector, 109, 1, 1, 1); //Test more complicated subclause SpanQuery[] clauses = new SpanQuery[2]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); match = new SpanNearQuery(clauses, 0, true); sfq = new SpanFirstQuery(match, 2); - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1); match = new SpanNearQuery(clauses, 0, false); sfq = new SpanFirstQuery(match, 2); - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, collector), collector, 100, 2, 1, 1); } @@ -124,9 +124,9 @@ public class TestPayloadSpans extends LuceneTestCase { writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); - - checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2}); + PayloadSpanCollector collector = new PayloadSpanCollector(); + checkSpans(MultiSpansWrapper.wrap(reader, snq, collector), collector, 1, new int[]{2}); reader.close(); directory.close(); } @@ -135,8 +135,10 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq; Spans spans; IndexSearcher searcher = getSearcher(); + PayloadSpanCollector collector = new PayloadSpanCollector(); + stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, collector); assertNull(spans); SpanQuery[] clauses = new SpanQuery[3]; @@ -145,9 +147,9 @@ public class TestPayloadSpans extends LuceneTestCase { clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{3,3}); + checkSpans(spans, collector, 2, new int[]{3,3}); clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); @@ -156,10 +158,10 @@ public class TestPayloadSpans extends LuceneTestCase { spanNearQuery = new SpanNearQuery(clauses, 6, true); - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 1, new int[]{3}); + checkSpans(spans, collector, 1, new int[]{3}); clauses = new SpanQuery[2]; @@ -178,10 +180,9 @@ public class TestPayloadSpans extends LuceneTestCase { SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false); // yy within 6 of xx within 6 of rr - - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{3,3}); + checkSpans(spans, collector, 2, new int[]{3,3}); closeIndexReader.close(); directory.close(); } @@ -208,12 +209,13 @@ public class TestPayloadSpans extends LuceneTestCase { clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); clauses3[1] = snq; - + + PayloadSpanCollector collector = new PayloadSpanCollector(); SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 1, new int[]{3}); + checkSpans(spans, collector, 1, new int[]{3}); closeIndexReader.close(); directory.close(); } @@ -248,9 +250,10 @@ public class TestPayloadSpans extends LuceneTestCase { SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); + PayloadSpanCollector collector = new PayloadSpanCollector(); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, collector); assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{8, 8}); + checkSpans(spans, collector, 2, new int[]{8, 8}); closeIndexReader.close(); directory.close(); } @@ -272,15 +275,17 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); + PayloadSpanCollector collector = new PayloadSpanCollector(); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { - Collection payloads = spans.getPayload(); - + collector.reset(); + spans.collect(collector); + Collection payloads = collector.getPayloads(); for (final byte [] payload : payloads) { payloadSet.add(new String(payload, StandardCharsets.UTF_8)); } @@ -310,14 +315,17 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); + PayloadSpanCollector collector = new PayloadSpanCollector(); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { - Collection payloads = spans.getPayload(); + collector.reset(); + spans.collect(collector); + Collection payloads = collector.getPayloads(); for (final byte [] payload : payloads) { payloadSet.add(new String(payload, StandardCharsets.UTF_8)); @@ -348,14 +356,17 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); + PayloadSpanCollector collector = new PayloadSpanCollector(); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, collector); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { - Collection payloads = spans.getPayload(); + collector.reset(); + spans.collect(collector); + Collection payloads = collector.getPayloads(); for (final byte [] payload : payloads) { payloadSet.add(new String(payload, StandardCharsets.UTF_8)); @@ -401,7 +412,7 @@ public class TestPayloadSpans extends LuceneTestCase { directory.close(); } - private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, + private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte) throws IOException { assertTrue("spans is null and it shouldn't be", spans != null); //each position match should have a span associated with it, since there is just one underlying term query, there should @@ -409,16 +420,16 @@ public class TestPayloadSpans extends LuceneTestCase { int seen = 0; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { - assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable()); - //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - assertEquals("payload size", expectedNumPayloads, payload.size()); - for (final byte [] thePayload : payload) { - assertEquals("payload length", expectedPayloadLength, thePayload.length); - assertEquals("payload first byte", expectedFirstByte, thePayload[0]); - } + collector.reset(); + spans.collect(collector); + + Collection payload = collector.getPayloads(); + assertEquals("payload size", expectedNumPayloads, payload.size()); + for (final byte [] thePayload : payload) { + assertEquals("payload length", expectedPayloadLength, thePayload.length); + assertEquals("payload first byte", expectedFirstByte, thePayload[0]); } + seen++; } } @@ -446,26 +457,26 @@ public class TestPayloadSpans extends LuceneTestCase { return searcher; } - private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { + private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException { int cnt = 0; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if(VERBOSE) System.out.println("\nSpans Dump --"); - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - if(VERBOSE) { - System.out.println("payloads for span:" + payload.size()); - for (final byte [] bytes : payload) { - System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " " + collector.reset(); + spans.collect(collector); + + Collection payload = collector.getPayloads(); + if(VERBOSE) { + System.out.println("payloads for span:" + payload.size()); + for (final byte [] bytes : payload) { + System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " " + new String(bytes, StandardCharsets.UTF_8)); - } } - assertEquals("payload size", numPayloads[cnt], payload.size()); - } else { // no payload available - assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 ); } + assertEquals("payload size", numPayloads[cnt], payload.size()); + cnt++; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index b3fc9e4e423..05a40d1d2bc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -17,17 +17,16 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Map; +import java.util.Set; + /** * Holds all implementations of classes in the o.a.l.s.spans package as a * back-compatibility test. It does not run any tests per-se, however if @@ -65,22 +64,17 @@ final class JustCompileSearchSpans { public int endPosition() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - + + @Override + public void collect(SpanCollector collector) throws IOException { + + } + @Override public int nextStartPosition() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - @Override - public Collection getPayload() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public boolean isPayloadAvailable() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public long cost() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -100,7 +94,7 @@ final class JustCompileSearchSpans { } @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) { + public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -137,22 +131,17 @@ final class JustCompileSearchSpans { public int endPosition() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - + + @Override + public void collect(SpanCollector collector) throws IOException { + + } + @Override public int nextStartPosition() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - @Override - public Collection getPayload() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public boolean isPayloadAvailable() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public long cost() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java index 8d2847f08f4..06ad4652d39 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java @@ -17,11 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -30,6 +25,11 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + /** * * A wrapper to perform span operations on a non-leaf reader context @@ -40,6 +40,10 @@ import org.apache.lucene.util.Bits; public class MultiSpansWrapper { public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException { + return wrap(reader, spanQuery, SpanCollector.NO_OP); + } + + public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanCollector collector) throws IOException { LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing LeafReaderContext lrContext = lr.getContext(); SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly @@ -50,7 +54,7 @@ public class MultiSpansWrapper { TermContext termContext = TermContext.build(lrContext, term); termContexts.put(term, termContext); } - Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts); + Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts, collector); return actSpans; } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 3f64b138cca..13159a13a7b 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -17,17 +17,6 @@ package org.apache.lucene.search.highlight; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; - import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.BinaryDocValues; @@ -59,6 +48,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.join.ToChildBlockJoinQuery; import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanCollector; import org.apache.lucene.search.spans.SpanFirstQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotQuery; @@ -69,6 +59,17 @@ import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + /** * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether @@ -307,7 +308,7 @@ public class WeightedSpanTermExtractor { termContexts.put(term, TermContext.build(context, term)); } Bits acceptDocs = context.reader().getLiveDocs(); - final Spans spans = q.getSpans(context, acceptDocs, termContexts); + final Spans spans = q.getSpans(context, acceptDocs, termContexts, SpanCollector.NO_OP); if (spans == null) { return; } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 0bc1e20a1e3..686617e3bb2 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -85,7 +85,7 @@ import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotQuery; import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanPayloadCheckQuery; +import org.apache.lucene.search.payloads.SpanPayloadCheckQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index 618b51912f0..7a2697cb4cf 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -17,10 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -29,6 +25,10 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Map; +import java.util.Set; + /** Wraps a span query with asserts */ public class AssertingSpanQuery extends SpanQuery { private final SpanQuery in; @@ -43,8 +43,8 @@ public class AssertingSpanQuery extends SpanQuery { } @Override - public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - Spans spans = in.getSpans(context, acceptDocs, termContexts); + public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts, SpanCollector collector) throws IOException { + Spans spans = in.getSpans(context, acceptDocs, termContexts, collector); if (spans == null) { return null; } else { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java index 59c69b1218f..6958f1b55fa 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java @@ -17,12 +17,10 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; - import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.spans.Spans; + +import java.io.IOException; /** * Wraps a Spans with additional asserts @@ -125,19 +123,13 @@ class AssertingSpans extends Spans { checkCurrentPositions(); return in.endPosition(); } - + @Override - public Collection getPayload() throws IOException { - assert state == State.ITERATING : "getPayload() called in illegal state: " + state + ": " + in; - return in.getPayload(); + public void collect(SpanCollector collector) throws IOException { + assert state == State.ITERATING : "collect() called in illegal state: " + state + ": " + in; + in.collect(collector); } - - @Override - public boolean isPayloadAvailable() throws IOException { - assert state == State.ITERATING : "isPayloadAvailable() called in illegal state: " + state + ": " + in; - return in.isPayloadAvailable(); - } - + @Override public int docID() { int doc = in.docID(); diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index f9e752e4ba3..f2d08707ac1 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; -import org.apache.lucene.search.spans.SpanPayloadCheckQuery; +import org.apache.lucene.search.payloads.SpanPayloadCheckQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.HighlightParams;