diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 663678d045b..a9805a96fd7 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -32,6 +32,26 @@ API Changes
* LUCENE-6067: Accountable.getChildResources has a default
implementation returning the empty list. (Robert Muir)
+======================= Lucene 5.2.0 =======================
+
+New Features
+
+* LUCENE-6308: Span queries now share document conjunction/intersection
+ code with boolean queries, and use two-phased iterators for
+ faster intersection by avoiding loading positions in certain cases.
+ (Paul Elschot, Robert Muir via Mike McCandless)
+
+Optimizations
+
+* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
+ one of the queries is MatchAllDocsQuery and just invokes the much
+ faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
+ Grand, Mike McCandless)
+
+Bug Fixes
+
+* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
+ (Varun Thacker, Adrien Grand, Mike McCandless)
======================= Lucene 5.1.0 =======================
New Features
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 2893eea53ca..a64be82aef2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -32,8 +32,8 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
-import java.util.Map;
import java.util.Map.Entry;
+import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
*/
public void deleteDocuments(Query... queries) throws IOException {
ensureOpen();
+
+ // LUCENE-6379: Specialize MatchAllDocsQuery
+ for(Query query : queries) {
+ if (query.getClass() == MatchAllDocsQuery.class) {
+ deleteAll();
+ return;
+ }
+ }
+
try {
if (docWriter.deleteQueries(queries)) {
processEvents(true, false);
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
index 987abf955c3..53342b57870 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@@ -23,8 +23,14 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.search.spans.Spans;
-class ConjunctionDISI extends DocIdSetIterator {
+/** A conjunction of DocIdSetIterators.
+ * This iterates over the doc ids that are present in each given DocIdSetIterator.
+ * Public only for use in {@link org.apache.lucene.search.spans}.
+ * @lucene.internal
+ */
+public class ConjunctionDISI extends DocIdSetIterator {
/** Create a conjunction over the provided iterators, taking advantage of
* {@link TwoPhaseIterator}. */
@@ -32,18 +38,16 @@ class ConjunctionDISI extends DocIdSetIterator {
final List allIterators = new ArrayList<>();
final List twoPhaseIterators = new ArrayList<>();
for (DocIdSetIterator iterator : iterators) {
- if (iterator instanceof Scorer) {
- // if we have a scorer, check if it supports two-phase iteration
- TwoPhaseIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
- if (twoPhaseIterator != null) {
- // Note:
- allIterators.add(twoPhaseIterator.approximation());
- twoPhaseIterators.add(twoPhaseIterator);
- } else {
- allIterators.add(iterator);
- }
- } else {
- // no approximation support, use the iterator as-is
+ TwoPhaseIterator twoPhaseIterator = null;
+ if (iterator instanceof Scorer) {
+ twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
+ } else if (iterator instanceof Spans) {
+ twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
+ }
+ if (twoPhaseIterator != null) {
+ allIterators.add(twoPhaseIterator.approximation());
+ twoPhaseIterators.add(twoPhaseIterator);
+ } else { // no approximation support, use the iterator as-is
allIterators.add(iterator);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
index 4089bc050e8..f13667ad401 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
@@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils;
* A query that matches all documents.
*
*/
-public class MatchAllDocsQuery extends Query {
+public final class MatchAllDocsQuery extends Query {
private class MatchAllScorer extends Scorer {
final float score;
@@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query {
private float queryWeight;
private float queryNorm;
- public MatchAllDocsWeight(IndexSearcher searcher) {
+ public MatchAllDocsWeight() {
super(MatchAllDocsQuery.this);
}
@@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
- return new MatchAllDocsWeight(searcher);
+ return new MatchAllDocsWeight();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
index e46bb45e85e..c7007e17712 100644
--- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
@@ -26,7 +26,6 @@ import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
@@ -71,7 +70,7 @@ public class PayloadNearQuery extends SpanNearQuery {
}
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PayloadNearSpanWeight(this, searcher);
}
@@ -113,7 +112,7 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override
public int hashCode() {
final int prime = 31;
- int result = super.hashCode();
+ int result = super.hashCode() ^ getClass().hashCode();
result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
result = prime * result + ((function == null) ? 0 : function.hashCode());
return result;
@@ -149,8 +148,10 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
- return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
- similarity, similarity.simScorer(stats, context));
+ Spans spans = query.getSpans(context, acceptDocs, termContexts);
+ return (spans == null)
+ ? null
+ : new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
}
@Override
@@ -188,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
protected float payloadScore;
private int payloadsSeen;
- protected PayloadNearSpanScorer(Spans spans, Weight weight,
+ protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
@@ -200,13 +201,13 @@ public class PayloadNearQuery extends SpanNearQuery {
if (subSpans[i] instanceof NearSpansOrdered) {
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
- subSpans[i].start(), subSpans[i].end());
+ subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
} else if (subSpans[i] instanceof NearSpansUnordered) {
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
- subSpans[i].start(), subSpans[i].end());
+ subSpans[i].startPosition(), subSpans[i].endPosition());
}
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
}
@@ -233,7 +234,7 @@ public class PayloadNearQuery extends SpanNearQuery {
scratch.length = thePayload.length;
payloadScore = function.currentScore(doc, fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
- spans.start(), spans.end(), scratch));
+ spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen;
}
}
@@ -241,22 +242,20 @@ public class PayloadNearQuery extends SpanNearQuery {
//
@Override
protected boolean setFreqCurrentDoc() throws IOException {
- if (!more) {
- return false;
- }
- doc = spans.doc();
- freq = 0.0f;
- payloadScore = 0;
- payloadsSeen = 0;
- do {
- int matchLength = spans.end() - spans.start();
- freq += docScorer.computeSlopFactor(matchLength);
- Spans[] spansArr = new Spans[1];
- spansArr[0] = spans;
- getPayloads(spansArr);
- more = spans.next();
- } while (more && (doc == spans.doc()));
- return true;
+ freq = 0.0f;
+ payloadScore = 0;
+ payloadsSeen = 0;
+ int startPos = spans.nextStartPosition();
+ assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
+ do {
+ int matchLength = spans.endPosition() - startPos;
+ freq += docScorer.computeSlopFactor(matchLength);
+ Spans[] spansArr = new Spans[1];
+ spansArr[0] = spans;
+ getPayloads(spansArr);
+ startPos = spans.nextStartPosition();
+ } while (startPos != Spans.NO_MORE_POSITIONS);
+ return true;
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
index 0329acce127..1596b35280f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
@@ -169,7 +169,7 @@ public class PayloadSpanUtil {
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
- inorder);
+ inorder);
sp.setBoost(query.getBoost());
getPayloads(payloads, sp);
}
@@ -186,11 +186,15 @@ public class PayloadSpanUtil {
}
for (LeafReaderContext leafReaderContext : context.leaves()) {
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
- while (spans.next() == true) {
- if (spans.isPayloadAvailable()) {
- Collection payload = spans.getPayload();
- for (byte [] bytes : payload) {
- payloads.add(bytes);
+ if (spans != null) {
+ while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
+ while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+ if (spans.isPayloadAvailable()) {
+ Collection payload = spans.getPayload();
+ for (byte [] bytes : payload) {
+ payloads.add(bytes);
+ }
+ }
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
index 463a6a0f806..977ed262c46 100644
--- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
*/
import java.io.IOException;
+import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
@@ -26,10 +27,10 @@ import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -60,14 +61,14 @@ public class PayloadTermQuery extends SpanTermQuery {
}
public PayloadTermQuery(Term term, PayloadFunction function,
- boolean includeSpanScore) {
+ boolean includeSpanScore) {
super(term);
- this.function = function;
+ this.function = Objects.requireNonNull(function);
this.includeSpanScore = includeSpanScore;
}
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PayloadTermWeight(this, searcher);
}
@@ -79,9 +80,11 @@ public class PayloadTermQuery extends SpanTermQuery {
}
@Override
- public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
- return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
- this, similarity.simScorer(stats, context));
+ public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
+ return (spans == null)
+ ? null
+ : new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
@@ -90,45 +93,42 @@ public class PayloadTermQuery extends SpanTermQuery {
protected int payloadsSeen;
private final TermSpans termSpans;
- public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
+ public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
- termSpans = spans;
+ termSpans = spans; // CHECKME: generics to use SpansScorer.spans as TermSpans.
}
@Override
protected boolean setFreqCurrentDoc() throws IOException {
- if (!more) {
- return false;
- }
- doc = spans.doc();
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
- while (more && doc == spans.doc()) {
- int matchLength = spans.end() - spans.start();
+ int startPos = spans.nextStartPosition();
+ assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
+ do {
+ int matchLength = spans.endPosition() - startPos;
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity);
- more = spans.next();// this moves positions to the next match in this
- // document
- }
- return more || (freq != 0);
+ startPos = spans.nextStartPosition();
+ } while (startPos != Spans.NO_MORE_POSITIONS);
+ return freq != 0;
}
protected void processPayload(Similarity similarity) throws IOException {
- if (termSpans.isPayloadAvailable()) {
+ if (spans.isPayloadAvailable()) {
final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
- spans.start(), spans.end(), payloadsSeen, payloadScore,
- docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
+ spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
+ docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
- spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
+ spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
@@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
- PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
+ PayloadTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@@ -220,7 +220,7 @@ public class PayloadTermQuery extends SpanTermQuery {
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
- result = prime * result + ((function == null) ? 0 : function.hashCode());
+ result = prime * result + function.hashCode();
result = prime * result + (includeSpanScore ? 1231 : 1237);
return result;
}
@@ -234,14 +234,9 @@ public class PayloadTermQuery extends SpanTermQuery {
if (getClass() != obj.getClass())
return false;
PayloadTermQuery other = (PayloadTermQuery) obj;
- if (function == null) {
- if (other.function != null)
- return false;
- } else if (!function.equals(other.function))
- return false;
if (includeSpanScore != other.includeSpanScore)
return false;
- return true;
+ return function.equals(other.function);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
index 9b740f6526b..465d3796cbe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
@@ -106,7 +106,7 @@ public class FieldMaskingSpanQuery extends SpanQuery {
}
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return maskedQuery.createWeight(searcher, needsScores);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
index d26965100f1..d94a1218e3d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
@@ -19,10 +19,13 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
+import java.util.Objects;
+
+import org.apache.lucene.search.TwoPhaseIterator;
/**
- * A {@link Spans} implementation which allows wrapping another spans instance
- * and override some selected methods.
+ * A {@link Spans} implementation wrapping another spans instance,
+ * allowing to override selected methods in a subclass.
*/
public class FilterSpans extends Spans {
@@ -31,32 +34,37 @@ public class FilterSpans extends Spans {
/** Wrap the given {@link Spans}. */
public FilterSpans(Spans in) {
- this.in = in;
+ this.in = Objects.requireNonNull(in);
}
@Override
- public boolean next() throws IOException {
- return in.next();
+ public int nextDoc() throws IOException {
+ return in.nextDoc();
}
@Override
- public boolean skipTo(int target) throws IOException {
- return in.skipTo(target);
+ public int advance(int target) throws IOException {
+ return in.advance(target);
}
@Override
- public int doc() {
- return in.doc();
+ public int docID() {
+ return in.docID();
}
@Override
- public int start() {
- return in.start();
+ public int nextStartPosition() throws IOException {
+ return in.nextStartPosition();
}
@Override
- public int end() {
- return in.end();
+ public int startPosition() {
+ return in.startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return in.endPosition();
}
@Override
@@ -79,4 +87,8 @@ public class FilterSpans extends Spans {
return "Filter(" + in.toString() + ")";
}
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ return in.asTwoPhaseIterator();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java
new file mode 100644
index 00000000000..e2251731992
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java
@@ -0,0 +1,103 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.ConjunctionDISI;
+import org.apache.lucene.search.TwoPhaseIterator;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Common super class for un/ordered Spans
+ */
+abstract class NearSpans extends Spans {
+ SpanNearQuery query;
+ int allowedSlop;
+
+ List subSpans; // in query order
+ DocIdSetIterator conjunction; // use to move to next doc with all clauses
+ boolean atFirstInCurrentDoc;
+ boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
+
+ NearSpans(SpanNearQuery query, List subSpans)
+ throws IOException {
+ this.query = Objects.requireNonNull(query);
+ this.allowedSlop = query.getSlop();
+ if (subSpans.size() < 2) {
+ throw new IllegalArgumentException("Less than 2 subSpans: " + query);
+ }
+ this.subSpans = Objects.requireNonNull(subSpans); // in query order
+ this.conjunction = ConjunctionDISI.intersect(subSpans);
+ }
+
+ @Override
+ public int docID() {
+ return conjunction.docID();
+ }
+
+ @Override
+ public long cost() {
+ return conjunction.cost();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return (conjunction.nextDoc() == NO_MORE_DOCS)
+ ? NO_MORE_DOCS
+ : toMatchDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return (conjunction.advance(target) == NO_MORE_DOCS)
+ ? NO_MORE_DOCS
+ : toMatchDoc();
+ }
+
+ abstract int toMatchDoc() throws IOException;
+
+ abstract boolean twoPhaseCurrentDocMatches() throws IOException;
+
+ /**
+ * Return a {@link TwoPhaseIterator} view of this {@link NearSpans}.
+ */
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
+
+ @Override
+ public boolean matches() throws IOException {
+ return twoPhaseCurrentDocMatches();
+ }
+ };
+ return res;
+ }
+
+ private Spans[] subSpansArray = null; // init only when needed.
+
+ public Spans[] getSubSpans() {
+ if (subSpansArray == null) {
+ subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
+ }
+ return subSpansArray;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
index 508c9661ed2..a77651e8e62 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
@@ -17,24 +17,18 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.InPlaceMergeSorter;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
-import java.util.Map;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
- * where the subspans do not overlap and have a maximum slop between them.
+ * where the subspans do not overlap and have a maximum slop between them,
+ * and that does not need to collect payloads.
+ * To also collect payloads, see {@link NearSpansPayloadOrdered}.
*
* The formed spans only contains minimum slop matches.
* The matching slop is computed from the distance(s) between
@@ -55,306 +49,196 @@ import java.util.Set;
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
-public class NearSpansOrdered extends Spans {
- private final int allowedSlop;
- private boolean firstTime = true;
- private boolean more = false;
+public class NearSpansOrdered extends NearSpans {
- /** The spans in the same order as the SpanNearQuery */
- private final Spans[] subSpans;
+ protected int matchDoc = -1;
+ protected int matchStart = -1;
+ protected int matchEnd = -1;
- /** Indicates that all subSpans have same doc() */
- private boolean inSameDoc = false;
-
- private int matchDoc = -1;
- private int matchStart = -1;
- private int matchEnd = -1;
- private List matchPayload;
-
- private final Spans[] subSpansByDoc;
- // Even though the array is probably almost sorted, InPlaceMergeSorter will likely
- // perform better since it has a lower overhead than TimSorter for small arrays
- private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
- @Override
- protected void swap(int i, int j) {
- ArrayUtil.swap(subSpansByDoc, i, j);
- }
- @Override
- protected int compare(int i, int j) {
- return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
- }
- };
-
- private SpanNearQuery query;
- private boolean collectPayloads = true;
-
- public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException {
- this(spanNearQuery, context, acceptDocs, termContexts, true);
+ public NearSpansOrdered(SpanNearQuery query, List subSpans) throws IOException {
+ super(query, subSpans);
+ this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
}
- public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map termContexts, boolean collectPayloads)
- throws IOException {
- if (spanNearQuery.getClauses().length < 2) {
- throw new IllegalArgumentException("Less than 2 clauses: "
- + spanNearQuery);
- }
- this.collectPayloads = collectPayloads;
- allowedSlop = spanNearQuery.getSlop();
- SpanQuery[] clauses = spanNearQuery.getClauses();
- subSpans = new Spans[clauses.length];
- matchPayload = new LinkedList<>();
- subSpansByDoc = new Spans[clauses.length];
- for (int i = 0; i < clauses.length; i++) {
- subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
- subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
- }
- query = spanNearQuery; // kept for toString() only.
- }
-
- // inherit javadocs
- @Override
- public int doc() { return matchDoc; }
-
- // inherit javadocs
- @Override
- public int start() { return matchStart; }
-
- // inherit javadocs
- @Override
- public int end() { return matchEnd; }
-
- public Spans[] getSubSpans() {
- return subSpans;
- }
-
- // TODO: Remove warning after API has been finalized
- // TODO: Would be nice to be able to lazy load payloads
- @Override
- public Collection getPayload() throws IOException {
- return matchPayload;
- }
-
- // TODO: Remove warning after API has been finalized
- @Override
- public boolean isPayloadAvailable() {
- return matchPayload.isEmpty() == false;
- }
-
- @Override
- public long cost() {
- long minCost = Long.MAX_VALUE;
- for (int i = 0; i < subSpans.length; i++) {
- minCost = Math.min(minCost, subSpans[i].cost());
- }
- return minCost;
- }
-
- // inherit javadocs
- @Override
- public boolean next() throws IOException {
- if (firstTime) {
- firstTime = false;
- for (int i = 0; i < subSpans.length; i++) {
- if (! subSpans[i].next()) {
- more = false;
- return false;
- }
- }
- more = true;
- }
- if(collectPayloads) {
- matchPayload.clear();
- }
- return advanceAfterOrdered();
- }
-
- // inherit javadocs
- @Override
- public boolean skipTo(int target) throws IOException {
- if (firstTime) {
- firstTime = false;
- for (int i = 0; i < subSpans.length; i++) {
- if (! subSpans[i].skipTo(target)) {
- more = false;
- return false;
- }
- }
- more = true;
- } else if (more && (subSpans[0].doc() < target)) {
- if (subSpans[0].skipTo(target)) {
- inSameDoc = false;
- } else {
- more = false;
- return false;
- }
- }
- if(collectPayloads) {
- matchPayload.clear();
- }
- return advanceAfterOrdered();
- }
-
/** Advances the subSpans to just after an ordered match with a minimum slop
* that is smaller than the slop allowed by the SpanNearQuery.
* @return true iff there is such a match.
*/
- private boolean advanceAfterOrdered() throws IOException {
- while (more && (inSameDoc || toSameDoc())) {
- if (stretchToOrder() && shrinkToAfterShortestMatch()) {
- return true;
- }
- }
- return false; // no more matches
- }
-
-
- /** Advance the subSpans to the same document */
- private boolean toSameDoc() throws IOException {
- sorter.sort(0, subSpansByDoc.length);
- int firstIndex = 0;
- int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
- while (subSpansByDoc[firstIndex].doc() != maxDoc) {
- if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
- more = false;
- inSameDoc = false;
- return false;
- }
- maxDoc = subSpansByDoc[firstIndex].doc();
- if (++firstIndex == subSpansByDoc.length) {
- firstIndex = 0;
- }
- }
- for (int i = 0; i < subSpansByDoc.length; i++) {
- assert (subSpansByDoc[i].doc() == maxDoc)
- : " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
- + "\n at doc " + subSpansByDoc[i].doc()
- + ", but should be at " + maxDoc;
- }
- inSameDoc = true;
- return true;
- }
-
- /** Check whether two Spans in the same document are ordered and not overlapping.
- * @return false iff spans2's start position is smaller than spans1's end position
- */
- static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
- assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
- assert spans1.start() < spans1.end();
- assert spans2.start() < spans2.end();
- return spans1.end() <= spans2.start();
- }
-
- /** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
- * starts and ends as parameters.
- */
- private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
- assert start1 < end1;
- assert start2 < end2;
- return end1 <= start2;
- }
-
- /** Order the subSpans within the same document by advancing all later spans
- * after the previous one.
- */
- private boolean stretchToOrder() throws IOException {
- matchDoc = subSpans[0].doc();
- for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
- while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
- if (! subSpans[i].next()) {
- inSameDoc = false;
- more = false;
- break;
- } else if (matchDoc != subSpans[i].doc()) {
- inSameDoc = false;
- break;
+ @Override
+ int toMatchDoc() throws IOException {
+ subSpansToFirstStartPosition();
+ while (true) {
+ if (! stretchToOrder()) {
+ if (conjunction.nextDoc() == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ subSpansToFirstStartPosition();
+ } else {
+ if (shrinkToAfterShortestMatch()) {
+ atFirstInCurrentDoc = true;
+ return conjunction.docID();
+ }
+ // not a match, after shortest ordered spans, not at beginning of doc.
+ if (oneExhaustedInCurrentDoc) {
+ if (conjunction.nextDoc() == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ subSpansToFirstStartPosition();
}
}
}
- return inSameDoc;
+ }
+
+ @Override
+ boolean twoPhaseCurrentDocMatches() throws IOException {
+ subSpansToFirstStartPosition();
+ while (true) {
+ if (! stretchToOrder()) {
+ return false;
+ }
+ if (shrinkToAfterShortestMatch()) {
+ atFirstInCurrentDoc = true;
+ return true;
+ }
+ // not a match, after shortest ordered spans
+ if (oneExhaustedInCurrentDoc) {
+ return false;
+ }
+ }
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ return matchStart;
+ }
+ while (true) {
+ if (oneExhaustedInCurrentDoc) {
+ matchStart = NO_MORE_POSITIONS;
+ matchEnd = NO_MORE_POSITIONS;
+ return NO_MORE_POSITIONS;
+ }
+ if (! stretchToOrder()) {
+ matchStart = NO_MORE_POSITIONS;
+ matchEnd = NO_MORE_POSITIONS;
+ return NO_MORE_POSITIONS;
+ }
+ if (shrinkToAfterShortestMatch()) { // may also leave oneExhaustedInCurrentDoc
+ return matchStart;
+ }
+ // after shortest ordered spans, or oneExhaustedInCurrentDoc
+ }
+ }
+
+ private void subSpansToFirstStartPosition() throws IOException {
+ for (Spans spans : subSpans) {
+ assert spans.startPosition() == -1 : "spans="+spans;
+ spans.nextStartPosition();
+ assert spans.startPosition() != NO_MORE_POSITIONS;
+ }
+ oneExhaustedInCurrentDoc = false;
+ }
+
+ /** Order the subSpans within the same document by using nextStartPosition on all subSpans
+ * after the first as little as necessary.
+ * Return true when the subSpans could be ordered in this way,
+ * otherwise at least one is exhausted in the current doc.
+ */
+ private boolean stretchToOrder() throws IOException {
+ Spans prevSpans = subSpans.get(0);
+ assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
+ assert prevSpans.endPosition() != NO_MORE_POSITIONS;
+ for (int i = 1; i < subSpans.size(); i++) {
+ Spans spans = subSpans.get(i);
+ assert spans.startPosition() != NO_MORE_POSITIONS;
+ assert spans.endPosition() != NO_MORE_POSITIONS;
+
+ while (prevSpans.endPosition() > spans.startPosition()) { // while overlapping spans
+ if (spans.nextStartPosition() == NO_MORE_POSITIONS) {
+ return false;
+ }
+ }
+ prevSpans = spans;
+ }
+ return true; // all subSpans ordered and non overlapping
}
/** The subSpans are ordered in the same doc, so there is a possible match.
- * Compute the slop while making the match as short as possible by advancing
- * all subSpans except the last one in reverse order.
+ * Compute the slop while making the match as short as possible by using nextStartPosition
+ * on all subSpans, except the last one, in reverse order.
*/
- private boolean shrinkToAfterShortestMatch() throws IOException {
- matchStart = subSpans[subSpans.length - 1].start();
- matchEnd = subSpans[subSpans.length - 1].end();
- Set possibleMatchPayloads = new HashSet<>();
- if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
- possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
- }
+ protected boolean shrinkToAfterShortestMatch() throws IOException {
+ Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
+ matchStart = lastSubSpans.startPosition();
+ matchEnd = lastSubSpans.endPosition();
- Collection possiblePayload = null;
-
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
- for (int i = subSpans.length - 2; i >= 0; i--) {
- Spans prevSpans = subSpans[i];
- if (collectPayloads && prevSpans.isPayloadAvailable()) {
- Collection payload = prevSpans.getPayload();
- possiblePayload = new ArrayList<>(payload.size());
- possiblePayload.addAll(payload);
- }
-
- int prevStart = prevSpans.start();
- int prevEnd = prevSpans.end();
- while (true) { // Advance prevSpans until after (lastStart, lastEnd)
- if (! prevSpans.next()) {
- inSameDoc = false;
- more = false;
- break; // Check remaining subSpans for final match.
- } else if (matchDoc != prevSpans.doc()) {
- inSameDoc = false; // The last subSpans is not advanced here.
- break; // Check remaining subSpans for last match in this document.
- } else {
- int ppStart = prevSpans.start();
- int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
- if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
- break; // Check remaining subSpans.
- } else { // prevSpans still before (lastStart, lastEnd)
- prevStart = ppStart;
- prevEnd = ppEnd;
- if (collectPayloads && prevSpans.isPayloadAvailable()) {
- Collection payload = prevSpans.getPayload();
- possiblePayload = new ArrayList<>(payload.size());
- possiblePayload.addAll(payload);
- }
- }
+ for (int i = subSpans.size() - 2; i >= 0; i--) {
+ Spans prevSpans = subSpans.get(i);
+
+ int prevStart = prevSpans.startPosition();
+ int prevEnd = prevSpans.endPosition();
+ while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
+ if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ break; // Check remaining subSpans for match.
}
+ int ppStart = prevSpans.startPosition();
+ int ppEnd = prevSpans.endPosition();
+ if (ppEnd > lastStart) { // if overlapping spans
+ break; // Check remaining subSpans.
+ }
+ // prevSpans still before (lastStart, lastEnd)
+ prevStart = ppStart;
+ prevEnd = ppEnd;
}
- if (collectPayloads && possiblePayload != null) {
- possibleMatchPayloads.addAll(possiblePayload);
- }
-
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
}
/* Do not break on (matchSlop > allowedSlop) here to make sure
- * that subSpans[0] is advanced after the match, if any.
+ * that on return the first subSpans has nextStartPosition called.
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
-
+
boolean match = matchSlop <= allowedSlop;
-
- if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
- matchPayload.addAll(possibleMatchPayloads);
- }
return match; // ordered and allowed slop
}
+ @Override
+ public int startPosition() {
+ return atFirstInCurrentDoc ? -1 : matchStart;
+ }
+
+ @Override
+ public int endPosition() {
+ return atFirstInCurrentDoc ? -1 : matchEnd;
+ }
+
+ /** Throws an UnsupportedOperationException */
+ @Override
+ public Collection getPayload() throws IOException {
+ throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
+ }
+
+ /** Throws an UnsupportedOperationException */
+ @Override
+ public boolean isPayloadAvailable() {
+ throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
+ }
+
@Override
public String toString() {
- return getClass().getName() + "("+query.toString()+")@"+
- (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
+ return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java
new file mode 100644
index 00000000000..b2ea4e85679
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java
@@ -0,0 +1,146 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Collection;
+import java.util.Set;
+
+/** A {@link NearSpansOrdered} that allows collecting payloads.
+ * Expert:
+ * Only public for subclassing. Most implementations should not need this class
+ */
+public class NearSpansPayloadOrdered extends NearSpansOrdered {
+
+ private List matchPayload;
+ private Set possibleMatchPayloads;
+
+ public NearSpansPayloadOrdered(SpanNearQuery query, List subSpans)
+ throws IOException {
+ super(query, subSpans);
+ this.matchPayload = new LinkedList<>();
+ this.possibleMatchPayloads = new HashSet<>();
+ }
+
+ /** The subSpans are ordered in the same doc, so there is a possible match.
+ * Compute the slop while making the match as short as possible by using nextStartPosition
+ * on all subSpans, except the last one, in reverse order.
+ * Also collect the payloads.
+ */
+ protected boolean shrinkToAfterShortestMatch() throws IOException {
+ Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
+ matchStart = lastSubSpans.startPosition();
+ matchEnd = lastSubSpans.endPosition();
+
+ matchPayload.clear();
+ possibleMatchPayloads.clear();
+
+ if (lastSubSpans.isPayloadAvailable()) {
+ possibleMatchPayloads.addAll(lastSubSpans.getPayload());
+ }
+
+ Collection possiblePayload = null;
+
+ int matchSlop = 0;
+ int lastStart = matchStart;
+ int lastEnd = matchEnd;
+ for (int i = subSpans.size() - 2; i >= 0; i--) {
+ Spans prevSpans = subSpans.get(i);
+
+ if (prevSpans.isPayloadAvailable()) {
+ Collection payload = prevSpans.getPayload();
+ possiblePayload = new ArrayList<>(payload.size());
+ possiblePayload.addAll(payload);
+ }
+
+ int prevStart = prevSpans.startPosition();
+ int prevEnd = prevSpans.endPosition();
+ while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
+ if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ break; // Check remaining subSpans for match.
+ }
+ int ppStart = prevSpans.startPosition();
+ int ppEnd = prevSpans.endPosition();
+ if (ppEnd > lastStart) { // if overlapping spans
+ break; // Check remaining subSpans.
+ }
+ // prevSpans still before (lastStart, lastEnd)
+ prevStart = ppStart;
+ prevEnd = ppEnd;
+ if (prevSpans.isPayloadAvailable()) {
+ Collection payload = prevSpans.getPayload();
+ if (possiblePayload == null) {
+ possiblePayload = new ArrayList<>(payload.size());
+ } else {
+ possiblePayload.clear();
+ }
+ possiblePayload.addAll(payload);
+ }
+ }
+
+ if (possiblePayload != null) {
+ possibleMatchPayloads.addAll(possiblePayload);
+ }
+
+ assert prevStart <= matchStart;
+ if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
+ matchSlop += (matchStart - prevEnd);
+ }
+
+ /* Do not break on (matchSlop > allowedSlop) here to make sure
+ * that on return the first subSpans has nextStartPosition called.
+ */
+ matchStart = prevStart;
+ lastStart = prevStart;
+ lastEnd = prevEnd;
+ }
+
+ boolean match = matchSlop <= allowedSlop;
+
+ if (match && possibleMatchPayloads.size() > 0) {
+ matchPayload.addAll(possibleMatchPayloads);
+ }
+
+ return match; // ordered and allowed slop
+ }
+
+ // TODO: Remove warning after API has been finalized
+ // TODO: Would be nice to be able to lazy load payloads
+ /** Return payloads when available. */
+ @Override
+ public Collection getPayload() throws IOException {
+ return matchPayload;
+ }
+
+ /** Indicates whether payloads are available */
+ @Override
+ public boolean isPayloadAvailable() {
+ return ! matchPayload.isEmpty();
+ }
+
+ @Override
+ public String toString() {
+ return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
+ }
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
index 168e52d2abf..814f6bdebaa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
@@ -17,253 +17,225 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import java.util.HashSet;
/**
* Similar to {@link NearSpansOrdered}, but for the unordered case.
- *
+ *
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
-public class NearSpansUnordered extends Spans {
- private SpanNearQuery query;
+public class NearSpansUnordered extends NearSpans {
- private List ordered = new ArrayList<>(); // spans in query order
- private Spans[] subSpans;
- private int slop; // from query
+ private List subSpanCells; // in query order
- private SpansCell first; // linked list of spans
- private SpansCell last; // sorted by doc only
+ private SpanPositionQueue spanPositionQueue;
- private int totalLength; // sum of current lengths
+ public NearSpansUnordered(SpanNearQuery query, List subSpans)
+ throws IOException {
+ super(query, subSpans);
- private CellQueue queue; // sorted queue of spans
- private SpansCell max; // max element in queue
+ this.subSpanCells = new ArrayList<>(subSpans.size());
+ for (Spans subSpan : subSpans) { // sub spans in query order
+ this.subSpanCells.add(new SpansCell(subSpan));
+ }
+ spanPositionQueue = new SpanPositionQueue(subSpans.size());
+ singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1
+ }
- private boolean more = true; // true iff not done
- private boolean firstTime = true; // true before first next()
+ private void singleCellToPositionQueue() {
+ maxEndPositionCell = subSpanCells.get(0);
+ assert maxEndPositionCell.docID() == -1;
+ assert maxEndPositionCell.startPosition() == -1;
+ spanPositionQueue.add(maxEndPositionCell);
+ }
- private class CellQueue extends PriorityQueue {
- public CellQueue(int size) {
+ private void subSpanCellsToPositionQueue() throws IOException { // used when all subSpanCells arrived at the same doc.
+ spanPositionQueue.clear();
+ for (SpansCell cell : subSpanCells) {
+ assert cell.startPosition() == -1;
+ cell.nextStartPosition();
+ assert cell.startPosition() != NO_MORE_POSITIONS;
+ spanPositionQueue.add(cell);
+ }
+ }
+
+ /** SpansCell wraps a sub Spans to maintain totalSpanLength and maxEndPositionCell */
+ private int totalSpanLength;
+ private SpansCell maxEndPositionCell;
+
+ private class SpansCell extends FilterSpans {
+ private int spanLength = -1;
+
+ public SpansCell(Spans spans) {
+ super(spans);
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ int res = in.nextStartPosition();
+ if (res != NO_MORE_POSITIONS) {
+ adjustLength();
+ }
+ adjustMax(); // also after last end position in current doc.
+ return res;
+ }
+
+ private void adjustLength() {
+ if (spanLength != -1) {
+ totalSpanLength -= spanLength; // subtract old, possibly from a previous doc
+ }
+ assert in.startPosition() != NO_MORE_POSITIONS;
+ spanLength = endPosition() - startPosition();
+ assert spanLength >= 0;
+ totalSpanLength += spanLength; // add new
+ }
+
+ private void adjustMax() {
+ assert docID() == maxEndPositionCell.docID();
+ if (endPosition() > maxEndPositionCell.endPosition()) {
+ maxEndPositionCell = this;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
+ }
+ }
+
+
+ private static class SpanPositionQueue extends PriorityQueue {
+ public SpanPositionQueue(int size) {
super(size);
}
-
+
@Override
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
- if (spans1.doc() == spans2.doc()) {
- return docSpansOrdered(spans1, spans2);
- } else {
- return spans1.doc() < spans2.doc();
- }
+ return positionsOrdered(spans1, spans2);
}
}
-
- /** Wraps a Spans, and can be used to form a linked list. */
- private class SpansCell extends Spans {
- private Spans spans;
- private SpansCell next;
- private int length = -1;
- private int index;
-
- public SpansCell(Spans spans, int index) {
- this.spans = spans;
- this.index = index;
- }
-
- @Override
- public boolean next() throws IOException {
- return adjust(spans.next());
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- return adjust(spans.skipTo(target));
- }
-
- private boolean adjust(boolean condition) {
- if (length != -1) {
- totalLength -= length; // subtract old length
- }
- if (condition) {
- length = end() - start();
- totalLength += length; // add new length
-
- if (max == null || doc() > max.doc()
- || (doc() == max.doc()) && (end() > max.end())) {
- max = this;
- }
- }
- more = condition;
- return condition;
- }
-
- @Override
- public int doc() { return spans.doc(); }
-
- @Override
- public int start() { return spans.start(); }
-
- @Override
- public int end() { return spans.end(); }
- // TODO: Remove warning after API has been finalized
- @Override
- public Collection getPayload() throws IOException {
- return new ArrayList<>(spans.getPayload());
- }
-
- // TODO: Remove warning after API has been finalized
- @Override
- public boolean isPayloadAvailable() throws IOException {
- return spans.isPayloadAvailable();
- }
-
- @Override
- public long cost() {
- return spans.cost();
- }
-
- @Override
- public String toString() { return spans.toString() + "#" + index; }
- }
-
-
- public NearSpansUnordered(SpanNearQuery query, LeafReaderContext context, Bits acceptDocs, Map termContexts)
- throws IOException {
- this.query = query;
- this.slop = query.getSlop();
-
- SpanQuery[] clauses = query.getClauses();
- queue = new CellQueue(clauses.length);
- subSpans = new Spans[clauses.length];
- for (int i = 0; i < clauses.length; i++) {
- SpansCell cell =
- new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
- ordered.add(cell);
- subSpans[i] = cell.spans;
- }
- }
- public Spans[] getSubSpans() {
- return subSpans;
- }
- @Override
- public boolean next() throws IOException {
- if (firstTime) {
- initList(true);
- listToQueue(); // initialize queue
- firstTime = false;
- } else if (more) {
- if (min().next()) { // trigger further scanning
- queue.updateTop(); // maintain queue
- } else {
- more = false;
- }
- }
-
- while (more) {
-
- boolean queueStale = false;
-
- if (min().doc() != max.doc()) { // maintain list
- queueToList();
- queueStale = true;
- }
-
- // skip to doc w/ all clauses
-
- while (more && first.doc() < last.doc()) {
- more = first.skipTo(last.doc()); // skip first upto last
- firstToLast(); // and move it to the end
- queueStale = true;
- }
-
- if (!more) return false;
-
- // found doc w/ all clauses
-
- if (queueStale) { // maintain the queue
- listToQueue();
- queueStale = false;
- }
-
- if (atMatch()) {
- return true;
- }
-
- more = min().next();
- if (more) {
- queue.updateTop(); // maintain queue
- }
- }
- return false; // no more matches
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- if (firstTime) { // initialize
- initList(false);
- for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
- more = cell.skipTo(target); // skip all
- }
- if (more) {
- listToQueue();
- }
- firstTime = false;
- } else { // normal case
- while (more && min().doc() < target) { // skip as needed
- if (min().skipTo(target)) {
- queue.updateTop();
- } else {
- more = false;
- }
- }
- }
- return more && (atMatch() || next());
- }
-
/** Check whether two Spans in the same document are ordered with possible overlap.
* @return true iff spans1 starts before spans2
* or the spans start at the same position,
* and spans1 ends before spans2.
*/
- static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
- assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
- int start1 = spans1.start();
- int start2 = spans2.start();
- return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
+ static final boolean positionsOrdered(Spans spans1, Spans spans2) {
+ assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID();
+ int start1 = spans1.startPosition();
+ int start2 = spans2.startPosition();
+ return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2);
}
- private SpansCell min() { return queue.top(); }
+ private SpansCell minPositionCell() {
+ return spanPositionQueue.top();
+ }
+
+ private boolean atMatch() {
+ assert minPositionCell().docID() == maxEndPositionCell.docID();
+ return (maxEndPositionCell.endPosition() - minPositionCell().startPosition() - totalSpanLength) <= allowedSlop;
+ }
@Override
- public int doc() { return min().doc(); }
- @Override
- public int start() { return min().start(); }
- @Override
- public int end() { return max.end(); }
+ int toMatchDoc() throws IOException {
+ // at doc with all subSpans
+ subSpanCellsToPositionQueue();
+ while (true) {
+ if (atMatch()) {
+ atFirstInCurrentDoc = true;
+ oneExhaustedInCurrentDoc = false;
+ return conjunction.docID();
+ }
+ assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
+ if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
+ spanPositionQueue.updateTop();
+ }
+ else { // exhausted a subSpan in current doc
+ if (conjunction.nextDoc() == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ // at doc with all subSpans
+ subSpanCellsToPositionQueue();
+ }
+ }
+ }
+
+ @Override
+ boolean twoPhaseCurrentDocMatches() throws IOException {
+ // at doc with all subSpans
+ subSpanCellsToPositionQueue();
+ while (true) {
+ if (atMatch()) {
+ atFirstInCurrentDoc = true;
+ oneExhaustedInCurrentDoc = false;
+ return true;
+ }
+ assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
+ if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
+ spanPositionQueue.updateTop();
+ }
+ else { // exhausted a subSpan in current doc
+ return false;
+ }
+ }
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ return minPositionCell().startPosition();
+ }
+ while (minPositionCell().startPosition() == -1) { // initially at current doc
+ minPositionCell().nextStartPosition();
+ spanPositionQueue.updateTop();
+ }
+ assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
+ while (true) {
+ if (minPositionCell().nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ return NO_MORE_POSITIONS;
+ }
+ spanPositionQueue.updateTop();
+ if (atMatch()) {
+ return minPositionCell().startPosition();
+ }
+ }
+ }
+
+ @Override
+ public int startPosition() {
+ assert minPositionCell() != null;
+ return atFirstInCurrentDoc ? -1
+ : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
+ : minPositionCell().startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return atFirstInCurrentDoc ? -1
+ : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
+ : maxEndPositionCell.endPosition();
+ }
+
- // TODO: Remove warning after API has been finalized
/**
- * WARNING: The List is not necessarily in order of the the positions
+ * WARNING: The List is not necessarily in order of the positions.
* @return Collection of byte[] payloads
* @throws IOException if there is a low-level I/O error
*/
@Override
public Collection getPayload() throws IOException {
Set matchPayload = new HashSet<>();
- for (SpansCell cell = first; cell != null; cell = cell.next) {
+ for (SpansCell cell : subSpanCells) {
if (cell.isPayloadAvailable()) {
matchPayload.addAll(cell.getPayload());
}
@@ -271,78 +243,23 @@ public class NearSpansUnordered extends Spans {
return matchPayload;
}
- // TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
- SpansCell pointer = min();
- while (pointer != null) {
- if (pointer.isPayloadAvailable()) {
+ for (SpansCell cell : subSpanCells) {
+ if (cell.isPayloadAvailable()) {
return true;
}
- pointer = pointer.next;
}
-
return false;
}
-
- @Override
- public long cost() {
- long minCost = Long.MAX_VALUE;
- for (int i = 0; i < subSpans.length; i++) {
- minCost = Math.min(minCost, subSpans[i].cost());
- }
- return minCost;
- }
@Override
public String toString() {
- return getClass().getName() + "("+query.toString()+")@"+
- (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
- }
-
- private void initList(boolean next) throws IOException {
- for (int i = 0; more && i < ordered.size(); i++) {
- SpansCell cell = ordered.get(i);
- if (next)
- more = cell.next(); // move to first entry
- if (more) {
- addToList(cell); // add to list
- }
+ if (minPositionCell() != null) {
+ return getClass().getName() + "("+query.toString()+")@"+
+ (docID()+":"+startPosition()+"-"+endPosition());
+ } else {
+ return getClass().getName() + "("+query.toString()+")@ ?START?";
}
}
-
- private void addToList(SpansCell cell) {
- if (last != null) { // add next to end of list
- last.next = cell;
- } else
- first = cell;
- last = cell;
- cell.next = null;
- }
-
- private void firstToLast() {
- last.next = first; // move first to end of list
- last = first;
- first = first.next;
- last.next = null;
- }
-
- private void queueToList() {
- last = first = null;
- while (queue.top() != null) {
- addToList(queue.pop());
- }
- }
-
- private void listToQueue() {
- queue.clear(); // rebuild queue
- for (SpansCell cell = first; cell != null; cell = cell.next) {
- queue.add(cell); // add to queue from list
- }
- }
-
- private boolean atMatch() {
- return (min().doc() == max.doc())
- && ((max.end() - min().start() - totalLength) <= slop);
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
index 7bcaa2caafd..708b1af3b17 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
@@ -21,9 +21,9 @@ import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
-/**
+/**
* Matches spans near the beginning of a field.
- *
+ *
* This class is a simple extension of {@link SpanPositionRangeQuery} in that it assumes the
* start to be zero and only checks the end boundary.
*/
@@ -37,10 +37,10 @@ public class SpanFirstQuery extends SpanPositionRangeQuery {
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
- assert spans.start() != spans.end() : "start equals end: " + spans.start();
- if (spans.start() >= end)
- return AcceptStatus.NO_AND_ADVANCE;
- else if (spans.end() <= end)
+ assert spans.startPosition() != spans.endPosition() : "start equals end: " + spans.startPosition();
+ if (spans.startPosition() >= end)
+ return AcceptStatus.NO_MORE_IN_CURRENT_DOC;
+ else if (spans.endPosition() <= end)
return AcceptStatus.YES;
else
return AcceptStatus.NO;
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java
index aa69146366b..f299e5f8c43 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java
@@ -105,7 +105,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
@Override
public int hashCode() {
- int h = match.hashCode();
+ int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
//TODO: is this right?
h ^= payloadToMatch.hashCode();
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
index 1e1d0831058..71b49014133 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@@ -37,7 +37,8 @@ import org.apache.lucene.util.ToStringUtils;
/** Matches spans which are near one another. One can specify slop, the
* maximum number of intervening unmatched positions, as well as whether
- * matches are required to be in-order. */
+ * matches are required to be in-order.
+ */
public class SpanNearQuery extends SpanQuery implements Cloneable {
protected List clauses;
protected int slop;
@@ -53,22 +54,19 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
* must be in the same order as in clauses and must be non-overlapping.
* When inOrder is false, the spans from each clause
* need not be ordered and may overlap.
- * @param clauses the clauses to find near each other
+ * @param clauses the clauses to find near each other, in the same field, at least 2.
* @param slop The slop value
* @param inOrder true if order is important
*/
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
- this(clauses, slop, inOrder, true);
+ this(clauses, slop, inOrder, true);
}
-
- public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) {
- // copy clauses array into an ArrayList
- this.clauses = new ArrayList<>(clauses.length);
- for (int i = 0; i < clauses.length; i++) {
- SpanQuery clause = clauses[i];
- if (field == null) { // check field
- field = clause.getField();
+ public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, boolean collectPayloads) {
+ this.clauses = new ArrayList<>(clausesIn.length);
+ for (SpanQuery clause : clausesIn) {
+ if (this.field == null) { // check field
+ this.field = clause.getField();
} else if (clause.getField() != null && !clause.getField().equals(field)) {
throw new IllegalArgumentException("Clauses must have same field.");
}
@@ -92,14 +90,13 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
@Override
public String getField() { return field; }
-
+
@Override
public void extractTerms(Set terms) {
for (final SpanQuery clause : clauses) {
clause.extractTerms(terms);
}
- }
-
+ }
@Override
public String toString(String field) {
@@ -124,15 +121,21 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException {
- if (clauses.size() == 0) // optimize 0-clause case
- return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
+ ArrayList subSpans = new ArrayList<>(clauses.size());
- if (clauses.size() == 1) // optimize 1-clause case
- return clauses.get(0).getSpans(context, acceptDocs, termContexts);
-
- return inOrder
- ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
- : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
+ for (SpanQuery seq : clauses) {
+ Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
+ if (subSpan != null) {
+ subSpans.add(subSpan);
+ } else {
+ return null; // all required
+ }
+ }
+
+ // all NearSpans require at least two subSpans
+ return (! inOrder) ? new NearSpansUnordered(this, subSpans)
+ : collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
+ : new NearSpansOrdered(this, subSpans);
}
@Override
@@ -148,12 +151,12 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
}
if (clone != null) {
- return clone; // some clauses rewrote
+ return clone; // some clauses rewrote
} else {
- return this; // no clauses rewrote
+ return this; // no clauses rewrote
}
}
-
+
@Override
public SpanNearQuery clone() {
int sz = clauses.size();
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
index 88c439d589b..5e1c3e4a2ae 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
@@ -30,9 +30,11 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
+import java.util.Objects;
-/** Removes matches which overlap with another SpanQuery or
- * within a x tokens before or y tokens after another SpanQuery. */
+/** Removes matches which overlap with another SpanQuery or which are
+ * within x tokens before or y tokens after another SpanQuery.
+ */
public class SpanNotQuery extends SpanQuery implements Cloneable {
private SpanQuery include;
private SpanQuery exclude;
@@ -45,20 +47,20 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
this(include, exclude, 0, 0);
}
-
+
/** Construct a SpanNotQuery matching spans from include which
- * have no overlap with spans from exclude within
+ * have no overlap with spans from exclude within
* dist tokens of include. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist);
}
-
+
/** Construct a SpanNotQuery matching spans from include which
- * have no overlap with spans from exclude within
+ * have no overlap with spans from exclude within
* pre tokens before or post tokens of include. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
- this.include = include;
- this.exclude = exclude;
+ this.include = Objects.requireNonNull(include);
+ this.exclude = Objects.requireNonNull(exclude);
this.pre = (pre >=0) ? pre : 0;
this.post = (post >= 0) ? post : 0;
@@ -96,81 +98,153 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
@Override
public SpanNotQuery clone() {
- SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
- (SpanQuery) exclude.clone(), pre, post);
+ SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.clone(),
+ (SpanQuery) exclude.clone(), pre, post);
spanNotQuery.setBoost(getBoost());
- return spanNotQuery;
+ return spanNotQuery;
}
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException {
+ Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
+ if (includeSpans == null) {
+ return null;
+ }
+
+ Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
+ if (excludeSpans == null) {
+ return includeSpans;
+ }
+
return new Spans() {
- private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
- private boolean moreInclude = true;
+ private boolean moreInclude = true;
+ private int includeStart = -1;
+ private int includeEnd = -1;
+ private boolean atFirstInCurrentDoc = false;
- private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
- private boolean moreExclude = excludeSpans.next();
+ private boolean moreExclude = excludeSpans.nextDoc() != NO_MORE_DOCS;
+ private int excludeStart = moreExclude ? excludeSpans.nextStartPosition() : NO_MORE_POSITIONS;
- @Override
- public boolean next() throws IOException {
- if (moreInclude) // move to next include
- moreInclude = includeSpans.next();
- while (moreInclude && moreExclude) {
+ @Override
+ public int nextDoc() throws IOException {
+ if (moreInclude) {
+ moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
+ if (moreInclude) {
+ atFirstInCurrentDoc = true;
+ includeStart = includeSpans.nextStartPosition();
+ assert includeStart != NO_MORE_POSITIONS;
+ }
+ }
+ toNextIncluded();
+ int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
+ return res;
+ }
- if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
- moreExclude = excludeSpans.skipTo(includeSpans.doc());
-
- while (moreExclude // while exclude is before
- && includeSpans.doc() == excludeSpans.doc()
- && excludeSpans.end() <= includeSpans.start() - pre) {
- moreExclude = excludeSpans.next(); // increment exclude
+ private void toNextIncluded() throws IOException {
+ while (moreInclude && moreExclude) {
+ if (includeSpans.docID() > excludeSpans.docID()) {
+ moreExclude = excludeSpans.advance(includeSpans.docID()) != NO_MORE_DOCS;
+ if (moreExclude) {
+ excludeStart = -1; // only use exclude positions at same doc
}
-
- if (!moreExclude // if no intersection
- || includeSpans.doc() != excludeSpans.doc()
- || includeSpans.end()+post <= excludeSpans.start())
- break; // we found a match
-
- moreInclude = includeSpans.next(); // intersected: keep scanning
}
- return moreInclude;
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- if (moreInclude) // skip include
- moreInclude = includeSpans.skipTo(target);
-
- if (!moreInclude)
- return false;
-
- if (moreExclude // skip exclude
- && includeSpans.doc() > excludeSpans.doc())
- moreExclude = excludeSpans.skipTo(includeSpans.doc());
-
- while (moreExclude // while exclude is before
- && includeSpans.doc() == excludeSpans.doc()
- && excludeSpans.end() <= includeSpans.start()-pre) {
- moreExclude = excludeSpans.next(); // increment exclude
+ if (excludeForwardInCurrentDocAndAtMatch()) {
+ break; // at match.
}
- if (!moreExclude // if no intersection
- || includeSpans.doc() != excludeSpans.doc()
- || includeSpans.end()+post <= excludeSpans.start())
- return true; // we found a match
+ // else intersected: keep scanning, to next doc if needed
+ includeStart = includeSpans.nextStartPosition();
+ if (includeStart == NO_MORE_POSITIONS) {
+ moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
+ if (moreInclude) {
+ atFirstInCurrentDoc = true;
+ includeStart = includeSpans.nextStartPosition();
+ assert includeStart != NO_MORE_POSITIONS;
+ }
+ }
+ }
+ }
- return next(); // scan to next match
+ private boolean excludeForwardInCurrentDocAndAtMatch() throws IOException {
+ assert moreInclude;
+ assert includeStart != NO_MORE_POSITIONS;
+ if (! moreExclude) {
+ return true;
+ }
+ if (includeSpans.docID() != excludeSpans.docID()) {
+ return true;
+ }
+ // at same doc
+ if (excludeStart == -1) { // init exclude start position if needed
+ excludeStart = excludeSpans.nextStartPosition();
+ assert excludeStart != NO_MORE_POSITIONS;
+ }
+ while (excludeSpans.endPosition() <= includeStart - pre) {
+ // exclude end position is before a possible exclusion
+ excludeStart = excludeSpans.nextStartPosition();
+ if (excludeStart == NO_MORE_POSITIONS) {
+ return true; // no more exclude at current doc.
+ }
+ }
+ // exclude end position far enough in current doc, check start position:
+ boolean res = includeSpans.endPosition() + post <= excludeStart;
+ return res;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (moreInclude) {
+ assert target > includeSpans.docID() : "target="+target+", includeSpans.docID()="+includeSpans.docID();
+ moreInclude = includeSpans.advance(target) != NO_MORE_DOCS;
+ if (moreInclude) {
+ atFirstInCurrentDoc = true;
+ includeStart = includeSpans.nextStartPosition();
+ assert includeStart != NO_MORE_POSITIONS;
+ }
+ }
+ toNextIncluded();
+ int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
+ return res;
+ }
+
+ @Override
+ public int docID() {
+ int res = includeSpans.docID();
+ return res;
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ assert moreInclude;
+
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ assert includeStart != NO_MORE_POSITIONS;
+ return includeStart;
}
- @Override
- public int doc() { return includeSpans.doc(); }
- @Override
- public int start() { return includeSpans.start(); }
- @Override
- public int end() { return includeSpans.end(); }
+ includeStart = includeSpans.nextStartPosition();
+ while ((includeStart != NO_MORE_POSITIONS)
+ && (! excludeForwardInCurrentDocAndAtMatch()))
+ {
+ includeStart = includeSpans.nextStartPosition();
+ }
+
+ return includeStart;
+ }
+
+ @Override
+ public int startPosition() {
+ assert includeStart == includeSpans.startPosition();
+ return atFirstInCurrentDoc ? -1 : includeStart;
+ }
+
+ @Override
+ public int endPosition() {
+ return atFirstInCurrentDoc ? -1 : includeSpans.endPosition();
+ }
- // TODO: Remove warning after API has been finalized
@Override
public Collection getPayload() throws IOException {
ArrayList result = null;
@@ -180,7 +254,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
return result;
}
- // TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return includeSpans.isPayloadAvailable();
@@ -193,10 +266,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
@Override
public String toString() {
- return "spans(" + SpanNotQuery.this.toString() + ")";
- }
-
- };
+ return "spans(" + SpanNotQuery.this.toString() + ")";
+ }
+ };
}
@Override
@@ -230,7 +302,7 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
SpanNotQuery other = (SpanNotQuery)o;
return this.include.equals(other.include)
&& this.exclude.equals(other.exclude)
- && this.pre == other.pre
+ && this.pre == other.pre
&& this.post == other.post;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
index 2b617e49ebc..71215d063cb 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@@ -35,18 +35,19 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query;
-/** Matches the union of its clauses.*/
+/** Matches the union of its clauses.
+ */
public class SpanOrQuery extends SpanQuery implements Cloneable {
private List clauses;
private String field;
- /** Construct a SpanOrQuery merging the provided clauses. */
+ /** Construct a SpanOrQuery merging the provided clauses.
+ * All clauses must have the same field.
+ */
public SpanOrQuery(SpanQuery... clauses) {
-
- // copy clauses array into an ArrayList
this.clauses = new ArrayList<>(clauses.length);
- for (int i = 0; i < clauses.length; i++) {
- addClause(clauses[i]);
+ for (SpanQuery seq : clauses) {
+ addClause(seq);
}
}
@@ -59,7 +60,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
}
this.clauses.add(clause);
}
-
+
/** Return the clauses whose spans are matched. */
public SpanQuery[] getClauses() {
return clauses.toArray(new SpanQuery[clauses.size()]);
@@ -74,7 +75,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
clause.extractTerms(terms);
}
}
-
+
@Override
public SpanOrQuery clone() {
int sz = clauses.size();
@@ -152,90 +153,120 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
@Override
protected final boolean lessThan(Spans spans1, Spans spans2) {
- if (spans1.doc() == spans2.doc()) {
- if (spans1.start() == spans2.start()) {
- return spans1.end() < spans2.end();
+ if (spans1.docID() == spans2.docID()) {
+ if (spans1.startPosition() == spans2.startPosition()) {
+ return spans1.endPosition() < spans2.endPosition();
} else {
- return spans1.start() < spans2.start();
+ return spans1.startPosition() < spans2.startPosition();
}
} else {
- return spans1.doc() < spans2.doc();
+ return spans1.docID() < spans2.docID();
}
}
}
@Override
- public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException {
- if (clauses.size() == 1) // optimize 1-clause case
- return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
+ public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts)
+ throws IOException {
+
+ ArrayList subSpans = new ArrayList<>(clauses.size());
+
+ for (SpanQuery seq : clauses) {
+ Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
+ if (subSpan != null) {
+ subSpans.add(subSpan);
+ }
+ }
+
+ if (subSpans.size() == 0) {
+ return null;
+ } else if (subSpans.size() == 1) {
+ return subSpans.get(0);
+ }
+
+ SpanQueue queue = new SpanQueue(clauses.size());
+ for (Spans spans : subSpans) {
+ queue.add(spans);
+ }
return new Spans() {
- private SpanQueue queue = null;
- private long cost;
- private boolean initSpanQueue(int target) throws IOException {
- queue = new SpanQueue(clauses.size());
- Iterator i = clauses.iterator();
- while (i.hasNext()) {
- Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
- cost += spans.cost();
- if ( ((target == -1) && spans.next())
- || ((target != -1) && spans.skipTo(target))) {
- queue.add(spans);
- }
- }
- return queue.size() != 0;
+ @Override
+ public int nextDoc() throws IOException {
+ if (queue.size() == 0) { // all done
+ return NO_MORE_DOCS;
}
- @Override
- public boolean next() throws IOException {
- if (queue == null) {
- return initSpanQueue(-1);
- }
+ int currentDoc = top().docID();
- if (queue.size() == 0) { // all done
- return false;
- }
+ if (currentDoc == -1) { // initially
+ return advance(0);
+ }
- if (top().next()) { // move to next
+ do {
+ if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
queue.updateTop();
- return true;
- }
-
- queue.pop(); // exhausted a clause
- return queue.size() != 0;
- }
-
- private Spans top() { return queue.top(); }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- if (queue == null) {
- return initSpanQueue(target);
- }
-
- boolean skipCalled = false;
- while (queue.size() != 0 && top().doc() < target) {
- if (top().skipTo(target)) {
- queue.updateTop();
- } else {
- queue.pop();
+ } else {
+ queue.pop(); // exhausted a clause
+ if (queue.size() == 0) {
+ return NO_MORE_DOCS;
}
- skipCalled = true;
}
-
- if (skipCalled) {
- return queue.size() != 0;
+ // assert queue.size() > 0;
+ int doc = top().docID();
+ if (doc > currentDoc) {
+ return doc;
+ }
+ } while (true);
+ }
+
+ private Spans top() {
+ return queue.top();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+
+ while ((queue.size() > 0) && (top().docID() < target)) {
+ if (top().advance(target) != NO_MORE_DOCS) {
+ queue.updateTop();
+ } else {
+ queue.pop();
}
- return next();
}
- @Override
- public int doc() { return top().doc(); }
- @Override
- public int start() { return top().start(); }
- @Override
- public int end() { return top().end(); }
+ return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
+ }
+
+ @Override
+ public int docID() {
+ return (queue == null) ? -1
+ : (queue.size() > 0) ? top().docID()
+ : NO_MORE_DOCS;
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ top().nextStartPosition();
+ queue.updateTop();
+ int startPos = top().startPosition();
+ while (startPos == -1) { // initially at this doc
+ top().nextStartPosition();
+ queue.updateTop();
+ startPos = top().startPosition();
+ }
+ return startPos;
+ }
+
+ @Override
+ public int startPosition() {
+ return top().startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return top().endPosition();
+ }
@Override
public Collection getPayload() throws IOException {
@@ -257,15 +288,23 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
public String toString() {
return "spans("+SpanOrQuery.this+")@"+
((queue == null)?"START"
- :(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
- }
+ :(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
+ }
+
+ private long cost = -1;
@Override
public long cost() {
+ if (cost == -1) {
+ cost = 0;
+ for (Spans spans : subSpans) {
+ cost += spans.cost();
+ }
+ }
return cost;
}
-
- };
+
+ };
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java
index dda6009e420..5edfef285db 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java
@@ -28,15 +28,14 @@ import java.util.Iterator;
* Only return those matches that have a specific payload at
* the given position.
*
- * Do not use this with an SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. Instead, use
- * {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
+ * Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
+ * Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
*/
-public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
+public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
protected final Collection payloadToMatch;
/**
- *
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
*/
@@ -71,7 +70,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
}
}
return AcceptStatus.YES;
- }
+ }
@Override
public String toString(String field) {
@@ -108,7 +107,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
@Override
public int hashCode() {
- int h = match.hashCode();
+ int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
//TODO: is this right?
h ^= payloadToMatch.hashCode();
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
index a41442d0699..2df1e5e3ff3 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
@@ -25,10 +25,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
import java.util.Map;
import java.util.Set;
+import java.util.Objects;
/**
@@ -37,9 +36,8 @@ import java.util.Set;
public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable {
protected SpanQuery match;
-
public SpanPositionCheckQuery(SpanQuery match) {
- this.match = match;
+ this.match = Objects.requireNonNull(match);
}
/**
@@ -60,42 +58,44 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
match.extractTerms(terms);
}
- /**
+ /**
* Return value for {@link SpanPositionCheckQuery#acceptPosition(Spans)}.
*/
protected static enum AcceptStatus {
/** Indicates the match should be accepted */
YES,
-
+
/** Indicates the match should be rejected */
NO,
-
- /**
- * Indicates the match should be rejected, and the enumeration should advance
- * to the next document.
+
+ /**
+ * Indicates the match should be rejected, and the enumeration may continue
+ * with the next document.
*/
- NO_AND_ADVANCE
+ NO_MORE_IN_CURRENT_DOC
};
-
+
/**
* Implementing classes are required to return whether the current position is a match for the passed in
- * "match" {@link org.apache.lucene.search.spans.SpanQuery}.
+ * "match" {@link SpanQuery}.
*
- * This is only called if the underlying {@link org.apache.lucene.search.spans.Spans#next()} for the
- * match is successful
+ * This is only called if the underlying last {@link Spans#nextStartPosition()} for the
+ * match indicated a valid start position.
*
*
- * @param spans The {@link org.apache.lucene.search.spans.Spans} instance, positioned at the spot to check
+ * @param spans The {@link Spans} instance, positioned at the spot to check
+ *
* @return whether the match is accepted, rejected, or rejected and should move to the next doc.
*
- * @see org.apache.lucene.search.spans.Spans#next()
+ * @see Spans#nextDoc()
*
*/
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
@Override
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException {
- return new PositionCheckSpan(context, acceptDocs, termContexts);
+ Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
+ return (matchSpans == null) ? null : new PositionCheckSpans(matchSpans);
}
@@ -116,79 +116,110 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
}
}
- protected class PositionCheckSpan extends Spans {
- private Spans spans;
+ protected class PositionCheckSpans extends FilterSpans {
- public PositionCheckSpan(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException {
- spans = match.getSpans(context, acceptDocs, termContexts);
+ private boolean atFirstInCurrentDoc = false;
+ private int startPos = -1;
+
+ public PositionCheckSpans(Spans matchSpans) throws IOException {
+ super(matchSpans);
}
@Override
- public boolean next() throws IOException {
- if (!spans.next())
- return false;
-
- return doNext();
+ public int nextDoc() throws IOException {
+ if (in.nextDoc() == NO_MORE_DOCS)
+ return NO_MORE_DOCS;
+
+ return toNextDocWithAllowedPosition();
}
@Override
- public boolean skipTo(int target) throws IOException {
- if (!spans.skipTo(target))
- return false;
+ public int advance(int target) throws IOException {
+ if (in.advance(target) == NO_MORE_DOCS)
+ return NO_MORE_DOCS;
- return doNext();
+ return toNextDocWithAllowedPosition();
}
-
- protected boolean doNext() throws IOException {
+
+ @SuppressWarnings("fallthrough")
+ protected int toNextDocWithAllowedPosition() throws IOException {
+ startPos = in.nextStartPosition();
+ assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(this)) {
- case YES: return true;
- case NO:
- if (!spans.next())
- return false;
- break;
- case NO_AND_ADVANCE:
- if (!spans.skipTo(spans.doc()+1))
- return false;
+ case YES:
+ atFirstInCurrentDoc = true;
+ return in.docID();
+ case NO:
+ startPos = in.nextStartPosition();
+ if (startPos != NO_MORE_POSITIONS) {
+ break;
+ }
+ // else fallthrough
+ case NO_MORE_IN_CURRENT_DOC:
+ if (in.nextDoc() == NO_MORE_DOCS) {
+ startPos = -1;
+ return NO_MORE_DOCS;
+ }
+ startPos = in.nextStartPosition();
+ assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
break;
}
}
}
@Override
- public int doc() { return spans.doc(); }
-
- @Override
- public int start() { return spans.start(); }
-
- @Override
- public int end() { return spans.end(); }
- // TODO: Remove warning after API has been finalized
-
- @Override
- public Collection getPayload() throws IOException {
- ArrayList result = null;
- if (spans.isPayloadAvailable()) {
- result = new ArrayList<>(spans.getPayload());
+ public int nextStartPosition() throws IOException {
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ return startPos;
}
- return result;//TODO: any way to avoid the new construction?
- }
- // TODO: Remove warning after API has been finalized
- @Override
- public boolean isPayloadAvailable() throws IOException {
- return spans.isPayloadAvailable();
+ for (;;) {
+ startPos = in.nextStartPosition();
+ if (startPos == NO_MORE_POSITIONS) {
+ return NO_MORE_POSITIONS;
+ }
+ switch(acceptPosition(this)) {
+ case YES:
+ return startPos;
+ case NO:
+ break;
+ case NO_MORE_IN_CURRENT_DOC:
+ return startPos = NO_MORE_POSITIONS; // startPos ahead for the current doc.
+ }
+ }
}
@Override
- public long cost() {
- return spans.cost();
+ public int startPosition() {
+ return atFirstInCurrentDoc ? -1 : startPos;
+ }
+
+ @Override
+ public int endPosition() {
+ return atFirstInCurrentDoc ? -1
+ : (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
}
@Override
public String toString() {
- return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
- }
+ return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
+ }
+ }
+ /** Returns true iff o is equal to this. */
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null) return false;
+ if (getClass() != o.getClass()) return false;
+ final SpanPositionCheckQuery spcq = (SpanPositionCheckQuery) o;
+ return match.equals(spcq.match);
+ }
+
+ @Override
+ public int hashCode() {
+ return match.hashCode() ^ getClass().hashCode();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java
index f588d281058..3da4e1ae222 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java
@@ -25,10 +25,10 @@ import java.io.IOException;
/**
* Checks to see if the {@link #getMatch()} lies between a start and end position
*
- * @see org.apache.lucene.search.spans.SpanFirstQuery for a derivation that is optimized for the case where start position is 0
+ * See {@link SpanFirstQuery} for a derivation that is optimized for the case where start position is 0.
*/
public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
- protected int start = 0;
+ protected int start;
protected int end;
public SpanPositionRangeQuery(SpanQuery match, int start, int end) {
@@ -40,13 +40,12 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
@Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
- assert spans.start() != spans.end();
- if (spans.start() >= end)
- return AcceptStatus.NO_AND_ADVANCE;
- else if (spans.start() >= start && spans.end() <= end)
- return AcceptStatus.YES;
- else
- return AcceptStatus.NO;
+ assert spans.startPosition() != spans.endPosition();
+ AcceptStatus res = (spans.startPosition() >= end)
+ ? AcceptStatus.NO_MORE_IN_CURRENT_DOC
+ : (spans.startPosition() >= start && spans.endPosition() <= end)
+ ? AcceptStatus.YES : AcceptStatus.NO;
+ return res;
}
@@ -96,7 +95,7 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
@Override
public int hashCode() {
- int h = match.hashCode();
+ int h = match.hashCode() ^ getClass().hashCode();
h ^= (h << 8) | (h >>> 25); // reversible
h ^= Float.floatToRawIntBits(getBoost()) ^ end ^ start;
return h;
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java
index 00bed758488..7c2687aa8e5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java
@@ -25,16 +25,17 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
/** Base class for span-based queries. */
public abstract class SpanQuery extends Query {
- /** Expert: Returns the matches for this query in an index. Used internally
- * to search for spans. */
+ /** Expert: Returns the matches for this query in an index.
+ * Used internally to search for spans.
+ * This may return null to indicate that the SpanQuery has no results.
+ */
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException;
- /**
+ /**
* Returns the name of the field matched by this query.
*
* Note that this may return null if the query matches no terms.
@@ -42,7 +43,7 @@ public abstract class SpanQuery extends Query {
public abstract String getField();
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanWeight(this, searcher);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
index 56b25713aa6..3c9a90eb7ac 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
@@ -18,9 +18,9 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
+import java.util.Objects;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
/**
@@ -29,58 +29,68 @@ import org.apache.lucene.search.similarities.Similarity;
public class SpanScorer extends Scorer {
protected Spans spans;
- protected boolean more = true;
-
protected int doc;
protected float freq;
protected int numMatches;
protected final Similarity.SimScorer docScorer;
-
- protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
+
+ protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
- this.docScorer = docScorer;
- this.spans = spans;
-
- doc = -1;
- more = spans.next();
+ this.docScorer = Objects.requireNonNull(docScorer);
+ this.spans = Objects.requireNonNull(spans);
+ this.doc = -1;
}
@Override
public int nextDoc() throws IOException {
- if (!setFreqCurrentDoc()) {
- doc = NO_MORE_DOCS;
+ int prevDoc = doc;
+ doc = spans.nextDoc();
+ if (doc != NO_MORE_DOCS) {
+ setFreqCurrentDoc();
}
return doc;
}
@Override
public int advance(int target) throws IOException {
- if (!more) {
- return doc = NO_MORE_DOCS;
- }
- if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
- more = spans.skipTo(target);
- }
- if (!setFreqCurrentDoc()) {
- doc = NO_MORE_DOCS;
+ int prevDoc = doc;
+ doc = spans.advance(target);
+ if (doc != NO_MORE_DOCS) {
+ setFreqCurrentDoc();
}
return doc;
}
-
+
protected boolean setFreqCurrentDoc() throws IOException {
- if (!more) {
- return false;
- }
- doc = spans.doc();
freq = 0.0f;
numMatches = 0;
+
+ assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
+ assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
+ int prevStartPos = -1;
+ int prevEndPos = -1;
+
+ int startPos = spans.nextStartPosition();
+ assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
- int matchLength = spans.end() - spans.start();
- freq += docScorer.computeSlopFactor(matchLength);
+ assert startPos >= prevStartPos;
+ int endPos = spans.endPosition();
+ assert endPos != Spans.NO_MORE_POSITIONS;
+ // This assertion can fail for Or spans on the same term:
+ // assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
+ assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
- more = spans.next();
- } while (more && (doc == spans.doc()));
+ int matchLength = endPos - startPos;
+ freq += docScorer.computeSlopFactor(matchLength);
+ prevStartPos = startPos;
+ prevEndPos = endPos;
+ startPos = spans.nextStartPosition();
+ } while (startPos != Spans.NO_MORE_POSITIONS);
+
+ assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
+ assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
+
return true;
}
@@ -89,15 +99,16 @@ public class SpanScorer extends Scorer {
@Override
public float score() throws IOException {
- return docScorer.score(doc, freq);
+ float s = docScorer.score(doc, freq);
+ return s;
}
-
+
@Override
public int freq() throws IOException {
return numMatches;
}
- /** Returns the intermediate "sloppy freq" adjusted for edit distance
+ /** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
index 2dc79ff6377..caa3963cc41 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
+import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.LeafReaderContext;
@@ -31,19 +32,23 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
-/** Matches spans containing a term. */
+/** Matches spans containing a term.
+ * This should not be used for terms that are indexed at position Integer.MAX_VALUE.
+ */
public class SpanTermQuery extends SpanQuery {
protected Term term;
/** Construct a SpanTermQuery matching the named term's spans. */
- public SpanTermQuery(Term term) { this.term = term; }
+ public SpanTermQuery(Term term) {
+ this.term = Objects.requireNonNull(term);
+ }
/** Return the term whose spans are matched. */
public Term getTerm() { return term; }
@Override
public String getField() { return term.field(); }
-
+
@Override
public void extractTerms(Set terms) {
terms.add(term);
@@ -64,7 +69,7 @@ public class SpanTermQuery extends SpanQuery {
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
- result = prime * result + ((term == null) ? 0 : term.hashCode());
+ result = prime * result + term.hashCode();
return result;
}
@@ -77,12 +82,7 @@ public class SpanTermQuery extends SpanQuery {
if (getClass() != obj.getClass())
return false;
SpanTermQuery other = (SpanTermQuery) obj;
- if (term == null) {
- if (other.term != null)
- return false;
- } else if (!term.equals(other.term))
- return false;
- return true;
+ return term.equals(other.term);
}
@Override
@@ -99,7 +99,7 @@ public class SpanTermQuery extends SpanQuery {
}
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(term.bytes())) {
+ if (termsEnum.seekExact(term.bytes())) {
state = termsEnum.termState();
} else {
state = null;
@@ -110,14 +110,14 @@ public class SpanTermQuery extends SpanQuery {
} else {
state = termContext.get(context.ord);
}
-
+
if (state == null) { // term is not present in that reader
- return TermSpans.EMPTY_TERM_SPANS;
+ return null;
}
-
+
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
-
+
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS);
return new TermSpans(postings, term);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
index c172243e86c..b7f1b288a51 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
@@ -51,7 +51,7 @@ public class SpanWeight extends Weight {
super(query);
this.similarity = searcher.getSimilarity();
this.query = query;
-
+
termContexts = new HashMap<>();
TreeSet terms = new TreeSet<>();
query.extractTerms(terms);
@@ -66,8 +66,8 @@ public class SpanWeight extends Weight {
}
final String field = query.getField();
if (field != null) {
- stats = similarity.computeWeight(query.getBoost(),
- searcher.collectionStatistics(query.getField()),
+ stats = similarity.computeWeight(query.getBoost(),
+ searcher.collectionStatistics(query.getField()),
termStats);
}
}
@@ -88,9 +88,9 @@ public class SpanWeight extends Weight {
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
if (stats == null) {
return null;
- } else {
- return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
}
+ Spans spans = query.getSpans(context, acceptDocs, termContexts);
+ return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
}
@Override
@@ -106,11 +106,11 @@ public class SpanWeight extends Weight {
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
- result.setMatch(true);
+ result.setMatch(true);
return result;
}
}
-
+
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
index 32aff3b2879..ea8bf8a5583 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
@@ -20,54 +20,44 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
-/** Expert: an enumeration of span matches. Used to implement span searching.
- * Each span represents a range of term positions within a document. Matches
- * are enumerated in order, by increasing document number, within that by
- * increasing start position and finally by increasing end position. */
-public abstract class Spans {
- /** Move to the next match, returning true iff any such exists. */
- public abstract boolean next() throws IOException;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TwoPhaseIterator;
- /** Skips to the first match beyond the current, whose document number is
- * greater than or equal to target.
- *
The behavior of this method is undefined when called with
- * target ≤ current, or after the iterator has exhausted.
- * Both cases may result in unpredicted behavior.
- *
- * Most implementations are considerably more efficient than that.
- */
- public abstract boolean skipTo(int target) throws IOException;
+/** Iterates through combinations of start/end positions per-doc.
+ * Each start/end position represents a range of term positions within the current document.
+ * These are enumerated in order, by increasing document number, within that by
+ * increasing start position and finally by increasing end position.
+ */
+public abstract class Spans extends DocIdSetIterator {
+ public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
- /** Returns the document number of the current match. Initially invalid. */
- public abstract int doc();
-
- /** Returns the start position of the current match. Initially invalid. */
- public abstract int start();
-
- /** Returns the end position of the current match. Initially invalid. */
- public abstract int end();
-
/**
- * Returns the payload data for the current span.
- * This is invalid until {@link #next()} is called for
- * the first time.
+ * Returns the next start position for the current doc.
+ * There is always at least one start/end position per doc.
+ * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
+ */
+ public abstract int nextStartPosition() throws IOException;
+
+ /**
+ * Returns the start position in the current doc, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
+ * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
+ */
+ public abstract int startPosition();
+
+ /**
+ * Returns the end position for the current start position, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
+ * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
+ */
+ public abstract int endPosition();
+
+ /**
+ * Returns the payload data for the current start/end position.
+ * This is only valid after {@link #nextStartPosition()}
+ * returned an available start position.
* This method must not be called more than once after each call
- * of {@link #next()}. However, most payloads are loaded lazily,
+ * of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
* so if the payload data for the current position is not needed,
- * this method may not be called at all for performance reasons. An ordered
- * SpanQuery does not lazy load, so if you have payloads in your index and
- * you do not want ordered SpanNearQuerys to collect payloads, you can
- * disable collection with a constructor option.
+ * this method may not be called at all for performance reasons.
*
* Note that the return type is a collection, thus the ordering should not be relied upon.
*
@@ -76,25 +66,35 @@ public abstract class Spans {
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
* @throws IOException if there is a low-level I/O error
*/
- // TODO: Remove warning after API has been finalized
public abstract Collection getPayload() throws IOException;
/**
- * Checks if a payload can be loaded at this position.
+ * Checks if a payload can be loaded at the current start/end position.
*
* Payloads can only be loaded once per call to
- * {@link #next()}.
+ * {@link #nextStartPosition()}.
*
- * @return true if there is a payload available at this position that can be loaded
+ * @return true if there is a payload available at this start/end position
+ * that can be loaded
*/
public abstract boolean isPayloadAvailable() throws IOException;
-
+
/**
- * Returns the estimated cost of this spans.
- *
- * This is generally an upper bound of the number of documents this iterator
- * might match, but may be a rough heuristic, hardcoded value, or otherwise
- * completely inaccurate.
+ * Optional method: Return a {@link TwoPhaseIterator} view of this
+ * {@link Spans}. A return value of {@code null} indicates that
+ * two-phase iteration is not supported.
+ *
+ * Note that the returned {@link TwoPhaseIterator}'s
+ * {@link TwoPhaseIterator#approximation() approximation} must
+ * advance synchronously with this iterator: advancing the approximation must
+ * advance this iterator and vice-versa.
+ *
+ * Implementing this method is typically useful on {@link Spans}s
+ * that have a high per-document overhead in order to confirm matches.
+ *
+ * The default implementation returns {@code null}.
*/
- public abstract long cost();
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ return null;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
index bca88de5ef2..5351b3d5513 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
@@ -24,10 +24,12 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collections;
import java.util.Collection;
+import java.util.Objects;
/**
* Expert:
- * Public for extension only
+ * Public for extension only.
+ * This does not work correctly for terms that indexed at position Integer.MAX_VALUE.
*/
public class TermSpans extends Spans {
protected final PostingsEnum postings;
@@ -39,65 +41,67 @@ public class TermSpans extends Spans {
protected boolean readPayload;
public TermSpans(PostingsEnum postings, Term term) {
- this.postings = postings;
- this.term = term;
- doc = -1;
- }
-
- // only for EmptyTermSpans (below)
- TermSpans() {
- term = null;
- postings = null;
+ this.postings = Objects.requireNonNull(postings);
+ this.term = Objects.requireNonNull(term);
+ this.doc = -1;
+ this.position = -1;
}
@Override
- public boolean next() throws IOException {
- if (count == freq) {
- if (postings == null) {
- return false;
- }
- doc = postings.nextDoc();
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- return false;
- }
+ public int nextDoc() throws IOException {
+ doc = postings.nextDoc();
+ if (doc != DocIdSetIterator.NO_MORE_DOCS) {
freq = postings.freq();
+ assert freq >= 1;
count = 0;
}
- position = postings.nextPosition();
- count++;
- readPayload = false;
- return true;
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- assert target > doc;
- doc = postings.advance(target);
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- return false;
- }
-
- freq = postings.freq();
- count = 0;
- position = postings.nextPosition();
- count++;
- readPayload = false;
- return true;
- }
-
- @Override
- public int doc() {
+ position = -1;
return doc;
}
@Override
- public int start() {
+ public int advance(int target) throws IOException {
+ assert target > doc;
+ doc = postings.advance(target);
+ if (doc != DocIdSetIterator.NO_MORE_DOCS) {
+ freq = postings.freq();
+ assert freq >= 1;
+ count = 0;
+ }
+ position = -1;
+ return doc;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ if (count == freq) {
+ assert position != NO_MORE_POSITIONS;
+ return position = NO_MORE_POSITIONS;
+ }
+ int prevPosition = position;
+ position = postings.nextPosition();
+ assert position >= prevPosition : "prevPosition="+prevPosition+" > position="+position;
+ assert position != NO_MORE_POSITIONS; // int endPosition not possible
+ count++;
+ readPayload = false;
return position;
}
@Override
- public int end() {
- return position + 1;
+ public int startPosition() {
+ return position;
+ }
+
+ @Override
+ public int endPosition() {
+ return (position == -1) ? -1
+ : (position != NO_MORE_POSITIONS) ? position + 1
+ : NO_MORE_POSITIONS;
}
@Override
@@ -105,7 +109,6 @@ public class TermSpans extends Spans {
return postings.cost();
}
- // TODO: Remove warning after API has been finalized
@Override
public Collection getPayload() throws IOException {
final BytesRef payload = postings.getPayload();
@@ -120,7 +123,6 @@ public class TermSpans extends Spans {
return Collections.singletonList(bytes);
}
- // TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() throws IOException {
return readPayload == false && postings.getPayload() != null;
@@ -129,55 +131,12 @@ public class TermSpans extends Spans {
@Override
public String toString() {
return "spans(" + term.toString() + ")@" +
- (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
+ (doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC"
+ : doc + " - " + (position == NO_MORE_POSITIONS ? "ENDPOS" : position));
}
public PostingsEnum getPostings() {
return postings;
}
- private static final class EmptyTermSpans extends TermSpans {
-
- @Override
- public boolean next() {
- return false;
- }
-
- @Override
- public boolean skipTo(int target) {
- return false;
- }
-
- @Override
- public int doc() {
- return DocIdSetIterator.NO_MORE_DOCS;
- }
-
- @Override
- public int start() {
- return -1;
- }
-
- @Override
- public int end() {
- return -1;
- }
-
- @Override
- public Collection getPayload() {
- return null;
- }
-
- @Override
- public boolean isPayloadAvailable() {
- return false;
- }
-
- @Override
- public long cost() {
- return 0;
- }
- }
-
- public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans();
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java b/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java
index 20f20b0e626..8e98eb13812 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java
@@ -18,14 +18,18 @@
/**
* The calculus of spans.
*
- *
A span is a <doc,startPosition,endPosition> tuple.
+ *
A span is a <doc,startPosition,endPosition> tuple that is enumerated by
+ * class {@link org.apache.lucene.search.spans.Spans Spans}.
+ *
*
*
The following span query operators are implemented:
*
*
*
*
A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans
- * containing a particular {@link org.apache.lucene.index.Term Term}.
+ * containing a particular {@link org.apache.lucene.index.Term Term}.
+ * This should not be used for terms that are indexed at position Integer.MAX_VALUE.
+ *
*
*
A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans
* which occur near one another, and can be used to implement things like
diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java
index d57c284df1f..74d7b7fa09f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@@ -46,6 +46,13 @@ public final class Version {
@Deprecated
public static final Version LUCENE_5_1_0 = new Version(5, 1, 0);
+ /**
+ * Match settings and bugs in Lucene's 5.2.0 release.
+ * @deprecated Use latest
+ */
+ @Deprecated
+ public static final Version LUCENE_5_2_0 = new Version(5, 2, 0);
+
/** Match settings and bugs in Lucene's 6.0 release.
*
* Use this to get the latest & greatest settings, bug
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
index b9b1f22c94c..ba8c8b746b0 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
@@ -217,6 +217,9 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
public void testNoWaitClose() throws IOException {
Directory directory = newDirectory();
+ if (directory instanceof MockDirectoryWrapper) {
+ ((MockDirectoryWrapper) directory).setPreventDoubleWrite(false);
+ }
Document doc = new Document();
Field idField = newStringField("id", "", Field.Store.YES);
doc.add(idField);
@@ -248,7 +251,6 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
// stress out aborting them on close:
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3);
writer.addDocument(doc);
- writer.commit();
try {
writer.commit();
@@ -267,7 +269,8 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
setOpenMode(OpenMode.APPEND).
setMergePolicy(newLogMergePolicy(100)).
// Force excessive merging:
- setMaxBufferedDocs(2)
+ setMaxBufferedDocs(2).
+ setCommitOnClose(false)
);
}
writer.close();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
index 1057e4ffff0..28ce0d7eb58 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
@@ -172,7 +172,7 @@ public class TestFieldsReader extends LuceneTestCase {
try {
i.seek(getFilePointer());
} catch (IOException e) {
- throw new RuntimeException();
+ throw new RuntimeException(e);
}
return i;
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java
new file mode 100644
index 00000000000..10023506298
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java
@@ -0,0 +1,71 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestIndexWriterDeleteByQuery extends LuceneTestCase {
+
+ // LUCENE-6379
+ public void testDeleteMatchAllDocsQuery() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ Document doc = new Document();
+ // Norms are disabled:
+ doc.add(newStringField("field", "foo", Field.Store.NO));
+ w.addDocument(doc);
+ DirectoryReader r = DirectoryReader.open(w, true);
+ FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field");
+ assertNotNull(fi);
+ assertFalse(fi.hasNorms());
+ assertEquals(1, r.numDocs());
+ assertEquals(1, r.maxDoc());
+
+ w.deleteDocuments(new MatchAllDocsQuery());
+ DirectoryReader r2 = DirectoryReader.openIfChanged(r);
+ r.close();
+
+ assertNotNull(r2);
+ assertEquals(0, r2.numDocs());
+ assertEquals(0, r2.maxDoc());
+
+ // Confirm the omitNorms bit is in fact no longer set:
+ doc = new Document();
+ // Norms are disabled:
+ doc.add(newTextField("field", "foo", Field.Store.NO));
+ w.addDocument(doc);
+
+ DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
+ r2.close();
+ assertNotNull(r3);
+ assertEquals(1, r3.numDocs());
+ assertEquals(1, r3.maxDoc());
+
+ // Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery:
+ fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field");
+ assertNotNull(fi);
+ assertTrue(fi.hasNorms());
+ r3.close();
+ w.close();
+ dir.close();
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
index 710827325d1..dc1b2f308cd 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
@@ -238,18 +238,20 @@ public class TestPositionIncrement extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\ngetPayloadSpans test");
}
- Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
- while (pspans.next()) {
- if (VERBOSE) {
- System.out.println("doc " + pspans.doc() + ": span " + pspans.start()
- + " to " + pspans.end());
- }
- Collection payloads = pspans.getPayload();
- sawZero |= pspans.start() == 0;
- for (byte[] bytes : payloads) {
- count++;
+ Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
+ while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
+ while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (VERBOSE) {
- System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
+ System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
+ + " to " + pspans.endPosition());
+ }
+ Collection payloads = pspans.getPayload();
+ sawZero |= pspans.startPosition() == 0;
+ for (byte[] bytes : payloads) {
+ count++;
+ if (VERBOSE) {
+ System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
+ }
}
}
}
@@ -257,20 +259,20 @@ public class TestPositionIncrement extends LuceneTestCase {
assertEquals(5, count);
// System.out.println("\ngetSpans test");
- Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
+ Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
count = 0;
sawZero = false;
- while (spans.next()) {
- count++;
- sawZero |= spans.start() == 0;
- // System.out.println(spans.doc() + " - " + spans.start() + " - " +
- // spans.end());
+ while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
+ while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+ count++;
+ sawZero |= spans.startPosition() == 0;
+ // System.out.println(spans.doc() + " - " + spans.start() + " - " +
+ // spans.end());
+ }
}
assertEquals(4, count);
assertTrue(sawZero);
- // System.out.println("\nPayloadSpanUtil test");
-
sawZero = false;
PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
Collection pls = psu.getPayloadsForQuery(snq);
diff --git a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
index 80fe83fc6cf..9f9f887de45 100644
--- a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
@@ -160,7 +160,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
}
CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
- Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
+ Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
/*float score = hits.score(0);
for (int i =1; i < hits.length(); i++)
@@ -211,13 +211,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
- Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
+ Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
- while (spans.next()) {
- count++;
+ while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
+ while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+ count++;
+ }
}
assertTrue(count + " does not equal: " + 200, count == 200);
}
@@ -253,13 +255,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
- Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
+ Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
- while (spans.next()) {
- count++;
+ while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
+ while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+ count++;
+ }
}
reader.close();
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
index 56afd7eb027..1fec61daf28 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
@@ -24,7 +24,6 @@ import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
@@ -42,27 +41,32 @@ final class JustCompileSearchSpans {
static final class JustCompileSpans extends Spans {
@Override
- public int doc() {
+ public int docID() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
- public int end() {
+ public int nextDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
- public boolean next() {
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int startPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
- public boolean skipTo(int target) {
+ public int endPosition() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
-
+
@Override
- public int start() {
+ public int nextStartPosition() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@@ -103,6 +107,36 @@ final class JustCompileSearchSpans {
static final class JustCompilePayloadSpans extends Spans {
+ @Override
+ public int docID() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int startPosition() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int endPosition() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
@Override
public Collection getPayload() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@@ -113,31 +147,6 @@ final class JustCompileSearchSpans {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
- @Override
- public int doc() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- public int end() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- public boolean next() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- public boolean skipTo(int target) {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- public int start() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
@Override
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@@ -147,7 +156,7 @@ final class JustCompileSearchSpans {
static final class JustCompileSpanScorer extends SpanScorer {
- protected JustCompileSpanScorer(Spans spans, Weight weight,
+ protected JustCompileSpanScorer(Spans spans, SpanWeight weight,
Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
index 7490c61466e..3c20d6b73ef 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
@@ -18,19 +18,18 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
import java.util.HashMap;
-import java.util.List;
+import java.util.HashSet;
import java.util.Map;
-import java.util.TreeSet;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.Bits;
/**
*
@@ -39,141 +38,20 @@ import org.apache.lucene.search.DocIdSetIterator;
* NOTE: This should be used for testing purposes only
* @lucene.internal
*/
-public class MultiSpansWrapper extends Spans { // can't be package private due to payloads
+public class MultiSpansWrapper {
- private SpanQuery query;
- private List leaves;
- private int leafOrd = 0;
- private Spans current;
- private Map termContexts;
- private final int numLeaves;
-
- private MultiSpansWrapper(List leaves, SpanQuery query, Map termContexts) {
- this.query = query;
- this.leaves = leaves;
- this.numLeaves = leaves.size();
- this.termContexts = termContexts;
- }
-
- public static Spans wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
+ public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
+ LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
+ LeafReaderContext lrContext = lr.getContext();
+ Query rewrittenQuery = spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
+ HashSet termSet = new HashSet<>();
+ rewrittenQuery.extractTerms(termSet);
Map termContexts = new HashMap<>();
- TreeSet terms = new TreeSet<>();
- query.extractTerms(terms);
- for (Term term : terms) {
- termContexts.put(term, TermContext.build(topLevelReaderContext, term));
+ for (Term term: termSet) {
+ TermContext termContext = TermContext.build(lrContext, term);
+ termContexts.put(term, termContext);
}
- final List leaves = topLevelReaderContext.leaves();
- if(leaves.size() == 1) {
- final LeafReaderContext ctx = leaves.get(0);
- return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- }
- return new MultiSpansWrapper(leaves, query, termContexts);
+ Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
+ return actSpans;
}
-
- @Override
- public boolean next() throws IOException {
- if (leafOrd >= numLeaves) {
- return false;
- }
- if (current == null) {
- final LeafReaderContext ctx = leaves.get(leafOrd);
- current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- }
- while(true) {
- if (current.next()) {
- return true;
- }
- if (++leafOrd < numLeaves) {
- final LeafReaderContext ctx = leaves.get(leafOrd);
- current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- } else {
- current = null;
- break;
- }
- }
- return false;
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- if (leafOrd >= numLeaves) {
- return false;
- }
-
- int subIndex = ReaderUtil.subIndex(target, leaves);
- assert subIndex >= leafOrd;
- if (subIndex != leafOrd) {
- final LeafReaderContext ctx = leaves.get(subIndex);
- current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- leafOrd = subIndex;
- } else if (current == null) {
- final LeafReaderContext ctx = leaves.get(leafOrd);
- current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- }
- while (true) {
- if (target < leaves.get(leafOrd).docBase) {
- // target was in the previous slice
- if (current.next()) {
- return true;
- }
- } else if (current.skipTo(target - leaves.get(leafOrd).docBase)) {
- return true;
- }
- if (++leafOrd < numLeaves) {
- final LeafReaderContext ctx = leaves.get(leafOrd);
- current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
- } else {
- current = null;
- break;
- }
- }
-
- return false;
- }
-
- @Override
- public int doc() {
- if (current == null) {
- return DocIdSetIterator.NO_MORE_DOCS;
- }
- return current.doc() + leaves.get(leafOrd).docBase;
- }
-
- @Override
- public int start() {
- if (current == null) {
- return DocIdSetIterator.NO_MORE_DOCS;
- }
- return current.start();
- }
-
- @Override
- public int end() {
- if (current == null) {
- return DocIdSetIterator.NO_MORE_DOCS;
- }
- return current.end();
- }
-
- @Override
- public Collection getPayload() throws IOException {
- if (current == null) {
- return Collections.emptyList();
- }
- return current.getPayload();
- }
-
- @Override
- public boolean isPayloadAvailable() throws IOException {
- if (current == null) {
- return false;
- }
- return current.isPayloadAvailable();
- }
-
- @Override
- public long cost() {
- return Integer.MAX_VALUE; // just for tests
- }
-
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
index 068964a2289..f5a51e6b88c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
@@ -651,47 +651,6 @@ public class TestBasics extends LuceneTestCase {
1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797});
}
- @Test
- public void testSpansSkipTo() throws Exception {
- SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
- SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
- Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
- Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
-
- assertTrue(s1.next());
- assertTrue(s2.next());
-
- boolean hasMore = true;
-
- do {
- hasMore = skipToAccordingToJavaDocs(s1, s1.doc() + 1);
- assertEquals(hasMore, s2.skipTo(s2.doc() + 1));
- assertEquals(s1.doc(), s2.doc());
- } while (hasMore);
- }
-
- /** Skips to the first match beyond the current, whose document number is
- * greater than or equal to target.