diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 663678d045b..a9805a96fd7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -32,6 +32,26 @@ API Changes * LUCENE-6067: Accountable.getChildResources has a default implementation returning the empty list. (Robert Muir) +======================= Lucene 5.2.0 ======================= + +New Features + +* LUCENE-6308: Span queries now share document conjunction/intersection + code with boolean queries, and use two-phased iterators for + faster intersection by avoiding loading positions in certain cases. + (Paul Elschot, Robert Muir via Mike McCandless) + +Optimizations + +* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if + one of the queries is MatchAllDocsQuery and just invokes the much + faster IndexWriter.deleteAll in that case (Robert Muir, Adrien + Grand, Mike McCandless) + +Bug Fixes + +* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause. + (Varun Thacker, Adrien Grand, Mike McCandless) ======================= Lucene 5.1.0 ======================= New Features diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 2893eea53ca..a64be82aef2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -32,8 +32,8 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Map.Entry; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { */ public void deleteDocuments(Query... queries) throws IOException { ensureOpen(); + + // LUCENE-6379: Specialize MatchAllDocsQuery + for(Query query : queries) { + if (query.getClass() == MatchAllDocsQuery.class) { + deleteAll(); + return; + } + } + try { if (docWriter.deleteQueries(queries)) { processEvents(true, false); diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 987abf955c3..53342b57870 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -23,8 +23,14 @@ import java.util.Comparator; import java.util.List; import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.search.spans.Spans; -class ConjunctionDISI extends DocIdSetIterator { +/** A conjunction of DocIdSetIterators. + * This iterates over the doc ids that are present in each given DocIdSetIterator. + *
Public only for use in {@link org.apache.lucene.search.spans}. + * @lucene.internal + */ +public class ConjunctionDISI extends DocIdSetIterator { /** Create a conjunction over the provided iterators, taking advantage of * {@link TwoPhaseIterator}. */ @@ -32,18 +38,16 @@ class ConjunctionDISI extends DocIdSetIterator { final List allIterators = new ArrayList<>(); final List twoPhaseIterators = new ArrayList<>(); for (DocIdSetIterator iterator : iterators) { - if (iterator instanceof Scorer) { - // if we have a scorer, check if it supports two-phase iteration - TwoPhaseIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator(); - if (twoPhaseIterator != null) { - // Note: - allIterators.add(twoPhaseIterator.approximation()); - twoPhaseIterators.add(twoPhaseIterator); - } else { - allIterators.add(iterator); - } - } else { - // no approximation support, use the iterator as-is + TwoPhaseIterator twoPhaseIterator = null; + if (iterator instanceof Scorer) { + twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator(); + } else if (iterator instanceof Spans) { + twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator(); + } + if (twoPhaseIterator != null) { + allIterators.add(twoPhaseIterator.approximation()); + twoPhaseIterators.add(twoPhaseIterator); + } else { // no approximation support, use the iterator as-is allIterators.add(iterator); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 4089bc050e8..f13667ad401 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils; * A query that matches all documents. * */ -public class MatchAllDocsQuery extends Query { +public final class MatchAllDocsQuery extends Query { private class MatchAllScorer extends Scorer { final float score; @@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query { private float queryWeight; private float queryNorm; - public MatchAllDocsWeight(IndexSearcher searcher) { + public MatchAllDocsWeight() { super(MatchAllDocsQuery.this); } @@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) { - return new MatchAllDocsWeight(searcher); + return new MatchAllDocsWeight(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index e46bb45e85e..c7007e17712 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -26,7 +26,6 @@ import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; @@ -71,7 +70,7 @@ public class PayloadNearQuery extends SpanNearQuery { } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new PayloadNearSpanWeight(this, searcher); } @@ -113,7 +112,7 @@ public class PayloadNearQuery extends SpanNearQuery { @Override public int hashCode() { final int prime = 31; - int result = super.hashCode(); + int result = super.hashCode() ^ getClass().hashCode(); result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode()); result = prime * result + ((function == null) ? 0 : function.hashCode()); return result; @@ -149,8 +148,10 @@ public class PayloadNearQuery extends SpanNearQuery { @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, - similarity, similarity.simScorer(stats, context)); + Spans spans = query.getSpans(context, acceptDocs, termContexts); + return (spans == null) + ? null + : new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context)); } @Override @@ -188,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery { protected float payloadScore; private int payloadsSeen; - protected PayloadNearSpanScorer(Spans spans, Weight weight, + protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, Similarity similarity, Similarity.SimScorer docScorer) throws IOException { super(spans, weight, docScorer); this.spans = spans; @@ -200,13 +201,13 @@ public class PayloadNearQuery extends SpanNearQuery { if (subSpans[i] instanceof NearSpansOrdered) { if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) { processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(), - subSpans[i].start(), subSpans[i].end()); + subSpans[i].startPosition(), subSpans[i].endPosition()); } getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans()); } else if (subSpans[i] instanceof NearSpansUnordered) { if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) { processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(), - subSpans[i].start(), subSpans[i].end()); + subSpans[i].startPosition(), subSpans[i].endPosition()); } getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans()); } @@ -233,7 +234,7 @@ public class PayloadNearQuery extends SpanNearQuery { scratch.length = thePayload.length; payloadScore = function.currentScore(doc, fieldName, start, end, payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc, - spans.start(), spans.end(), scratch)); + spans.startPosition(), spans.endPosition(), scratch)); ++payloadsSeen; } } @@ -241,22 +242,20 @@ public class PayloadNearQuery extends SpanNearQuery { // @Override protected boolean setFreqCurrentDoc() throws IOException { - if (!more) { - return false; - } - doc = spans.doc(); - freq = 0.0f; - payloadScore = 0; - payloadsSeen = 0; - do { - int matchLength = spans.end() - spans.start(); - freq += docScorer.computeSlopFactor(matchLength); - Spans[] spansArr = new Spans[1]; - spansArr[0] = spans; - getPayloads(spansArr); - more = spans.next(); - } while (more && (doc == spans.doc())); - return true; + freq = 0.0f; + payloadScore = 0; + payloadsSeen = 0; + int startPos = spans.nextStartPosition(); + assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans; + do { + int matchLength = spans.endPosition() - startPos; + freq += docScorer.computeSlopFactor(matchLength); + Spans[] spansArr = new Spans[1]; + spansArr[0] = spans; + getPayloads(spansArr); + startPos = spans.nextStartPosition(); + } while (startPos != Spans.NO_MORE_POSITIONS); + return true; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java index 0329acce127..1596b35280f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java @@ -169,7 +169,7 @@ public class PayloadSpanUtil { final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, - inorder); + inorder); sp.setBoost(query.getBoost()); getPayloads(payloads, sp); } @@ -186,11 +186,15 @@ public class PayloadSpanUtil { } for (LeafReaderContext leafReaderContext : context.leaves()) { final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts); - while (spans.next() == true) { - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - for (byte [] bytes : payload) { - payloads.add(bytes); + if (spans != null) { + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + for (byte [] bytes : payload) { + payloads.add(bytes); + } + } } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 463a6a0f806..977ed262c46 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads; */ import java.io.IOException; +import java.util.Objects; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; @@ -26,10 +27,10 @@ import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.spans.Spans; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.SpanTermQuery; @@ -60,14 +61,14 @@ public class PayloadTermQuery extends SpanTermQuery { } public PayloadTermQuery(Term term, PayloadFunction function, - boolean includeSpanScore) { + boolean includeSpanScore) { super(term); - this.function = function; + this.function = Objects.requireNonNull(function); this.includeSpanScore = includeSpanScore; } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new PayloadTermWeight(this, searcher); } @@ -79,9 +80,11 @@ public class PayloadTermQuery extends SpanTermQuery { } @Override - public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { - return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), - this, similarity.simScorer(stats, context)); + public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { + TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts); + return (spans == null) + ? null + : new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context)); } protected class PayloadTermSpanScorer extends SpanScorer { @@ -90,45 +93,42 @@ public class PayloadTermQuery extends SpanTermQuery { protected int payloadsSeen; private final TermSpans termSpans; - public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException { + public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(spans, weight, docScorer); - termSpans = spans; + termSpans = spans; // CHECKME: generics to use SpansScorer.spans as TermSpans. } @Override protected boolean setFreqCurrentDoc() throws IOException { - if (!more) { - return false; - } - doc = spans.doc(); freq = 0.0f; numMatches = 0; payloadScore = 0; payloadsSeen = 0; - while (more && doc == spans.doc()) { - int matchLength = spans.end() - spans.start(); + int startPos = spans.nextStartPosition(); + assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans; + do { + int matchLength = spans.endPosition() - startPos; freq += docScorer.computeSlopFactor(matchLength); numMatches++; processPayload(similarity); - more = spans.next();// this moves positions to the next match in this - // document - } - return more || (freq != 0); + startPos = spans.nextStartPosition(); + } while (startPos != Spans.NO_MORE_POSITIONS); + return freq != 0; } protected void processPayload(Similarity similarity) throws IOException { - if (termSpans.isPayloadAvailable()) { + if (spans.isPayloadAvailable()) { final PostingsEnum postings = termSpans.getPostings(); payload = postings.getPayload(); if (payload != null) { payloadScore = function.currentScore(doc, term.field(), - spans.start(), spans.end(), payloadsSeen, payloadScore, - docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload)); + spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, + docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload)); } else { payloadScore = function.currentScore(doc, term.field(), - spans.start(), spans.end(), payloadsSeen, payloadScore, 1F); + spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F); } payloadsSeen++; @@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs()); + PayloadTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { @@ -220,7 +220,7 @@ public class PayloadTermQuery extends SpanTermQuery { public int hashCode() { final int prime = 31; int result = super.hashCode(); - result = prime * result + ((function == null) ? 0 : function.hashCode()); + result = prime * result + function.hashCode(); result = prime * result + (includeSpanScore ? 1231 : 1237); return result; } @@ -234,14 +234,9 @@ public class PayloadTermQuery extends SpanTermQuery { if (getClass() != obj.getClass()) return false; PayloadTermQuery other = (PayloadTermQuery) obj; - if (function == null) { - if (other.function != null) - return false; - } else if (!function.equals(other.function)) - return false; if (includeSpanScore != other.includeSpanScore) return false; - return true; + return function.equals(other.function); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 9b740f6526b..465d3796cbe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -106,7 +106,7 @@ public class FieldMaskingSpanQuery extends SpanQuery { } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return maskedQuery.createWeight(searcher, needsScores); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java index d26965100f1..d94a1218e3d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java @@ -19,10 +19,13 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Collection; +import java.util.Objects; + +import org.apache.lucene.search.TwoPhaseIterator; /** - * A {@link Spans} implementation which allows wrapping another spans instance - * and override some selected methods. + * A {@link Spans} implementation wrapping another spans instance, + * allowing to override selected methods in a subclass. */ public class FilterSpans extends Spans { @@ -31,32 +34,37 @@ public class FilterSpans extends Spans { /** Wrap the given {@link Spans}. */ public FilterSpans(Spans in) { - this.in = in; + this.in = Objects.requireNonNull(in); } @Override - public boolean next() throws IOException { - return in.next(); + public int nextDoc() throws IOException { + return in.nextDoc(); } @Override - public boolean skipTo(int target) throws IOException { - return in.skipTo(target); + public int advance(int target) throws IOException { + return in.advance(target); } @Override - public int doc() { - return in.doc(); + public int docID() { + return in.docID(); } @Override - public int start() { - return in.start(); + public int nextStartPosition() throws IOException { + return in.nextStartPosition(); } @Override - public int end() { - return in.end(); + public int startPosition() { + return in.startPosition(); + } + + @Override + public int endPosition() { + return in.endPosition(); } @Override @@ -79,4 +87,8 @@ public class FilterSpans extends Spans { return "Filter(" + in.toString() + ")"; } + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + return in.asTwoPhaseIterator(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java new file mode 100644 index 00000000000..e2251731992 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java @@ -0,0 +1,103 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.ConjunctionDISI; +import org.apache.lucene.search.TwoPhaseIterator; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * Common super class for un/ordered Spans + */ +abstract class NearSpans extends Spans { + SpanNearQuery query; + int allowedSlop; + + List subSpans; // in query order + DocIdSetIterator conjunction; // use to move to next doc with all clauses + boolean atFirstInCurrentDoc; + boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc + + NearSpans(SpanNearQuery query, List subSpans) + throws IOException { + this.query = Objects.requireNonNull(query); + this.allowedSlop = query.getSlop(); + if (subSpans.size() < 2) { + throw new IllegalArgumentException("Less than 2 subSpans: " + query); + } + this.subSpans = Objects.requireNonNull(subSpans); // in query order + this.conjunction = ConjunctionDISI.intersect(subSpans); + } + + @Override + public int docID() { + return conjunction.docID(); + } + + @Override + public long cost() { + return conjunction.cost(); + } + + @Override + public int nextDoc() throws IOException { + return (conjunction.nextDoc() == NO_MORE_DOCS) + ? NO_MORE_DOCS + : toMatchDoc(); + } + + @Override + public int advance(int target) throws IOException { + return (conjunction.advance(target) == NO_MORE_DOCS) + ? NO_MORE_DOCS + : toMatchDoc(); + } + + abstract int toMatchDoc() throws IOException; + + abstract boolean twoPhaseCurrentDocMatches() throws IOException; + + /** + * Return a {@link TwoPhaseIterator} view of this {@link NearSpans}. + */ + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + TwoPhaseIterator res = new TwoPhaseIterator(conjunction) { + + @Override + public boolean matches() throws IOException { + return twoPhaseCurrentDocMatches(); + } + }; + return res; + } + + private Spans[] subSpansArray = null; // init only when needed. + + public Spans[] getSubSpans() { + if (subSpansArray == null) { + subSpansArray = subSpans.toArray(new Spans[subSpans.size()]); + } + return subSpansArray; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index 508c9661ed2..a77651e8e62 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -17,24 +17,18 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.InPlaceMergeSorter; - import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Collection; -import java.util.Map; import java.util.Set; /** A Spans that is formed from the ordered subspans of a SpanNearQuery - * where the subspans do not overlap and have a maximum slop between them. + * where the subspans do not overlap and have a maximum slop between them, + * and that does not need to collect payloads. + * To also collect payloads, see {@link NearSpansPayloadOrdered}. *

* The formed spans only contains minimum slop matches.
* The matching slop is computed from the distance(s) between @@ -55,306 +49,196 @@ import java.util.Set; * Expert: * Only public for subclassing. Most implementations should not need this class */ -public class NearSpansOrdered extends Spans { - private final int allowedSlop; - private boolean firstTime = true; - private boolean more = false; +public class NearSpansOrdered extends NearSpans { - /** The spans in the same order as the SpanNearQuery */ - private final Spans[] subSpans; + protected int matchDoc = -1; + protected int matchStart = -1; + protected int matchEnd = -1; - /** Indicates that all subSpans have same doc() */ - private boolean inSameDoc = false; - - private int matchDoc = -1; - private int matchStart = -1; - private int matchEnd = -1; - private List matchPayload; - - private final Spans[] subSpansByDoc; - // Even though the array is probably almost sorted, InPlaceMergeSorter will likely - // perform better since it has a lower overhead than TimSorter for small arrays - private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - ArrayUtil.swap(subSpansByDoc, i, j); - } - @Override - protected int compare(int i, int j) { - return subSpansByDoc[i].doc() - subSpansByDoc[j].doc(); - } - }; - - private SpanNearQuery query; - private boolean collectPayloads = true; - - public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - this(spanNearQuery, context, acceptDocs, termContexts, true); + public NearSpansOrdered(SpanNearQuery query, List subSpans) throws IOException { + super(query, subSpans); + this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1 } - public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map termContexts, boolean collectPayloads) - throws IOException { - if (spanNearQuery.getClauses().length < 2) { - throw new IllegalArgumentException("Less than 2 clauses: " - + spanNearQuery); - } - this.collectPayloads = collectPayloads; - allowedSlop = spanNearQuery.getSlop(); - SpanQuery[] clauses = spanNearQuery.getClauses(); - subSpans = new Spans[clauses.length]; - matchPayload = new LinkedList<>(); - subSpansByDoc = new Spans[clauses.length]; - for (int i = 0; i < clauses.length; i++) { - subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts); - subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() - } - query = spanNearQuery; // kept for toString() only. - } - - // inherit javadocs - @Override - public int doc() { return matchDoc; } - - // inherit javadocs - @Override - public int start() { return matchStart; } - - // inherit javadocs - @Override - public int end() { return matchEnd; } - - public Spans[] getSubSpans() { - return subSpans; - } - - // TODO: Remove warning after API has been finalized - // TODO: Would be nice to be able to lazy load payloads - @Override - public Collection getPayload() throws IOException { - return matchPayload; - } - - // TODO: Remove warning after API has been finalized - @Override - public boolean isPayloadAvailable() { - return matchPayload.isEmpty() == false; - } - - @Override - public long cost() { - long minCost = Long.MAX_VALUE; - for (int i = 0; i < subSpans.length; i++) { - minCost = Math.min(minCost, subSpans[i].cost()); - } - return minCost; - } - - // inherit javadocs - @Override - public boolean next() throws IOException { - if (firstTime) { - firstTime = false; - for (int i = 0; i < subSpans.length; i++) { - if (! subSpans[i].next()) { - more = false; - return false; - } - } - more = true; - } - if(collectPayloads) { - matchPayload.clear(); - } - return advanceAfterOrdered(); - } - - // inherit javadocs - @Override - public boolean skipTo(int target) throws IOException { - if (firstTime) { - firstTime = false; - for (int i = 0; i < subSpans.length; i++) { - if (! subSpans[i].skipTo(target)) { - more = false; - return false; - } - } - more = true; - } else if (more && (subSpans[0].doc() < target)) { - if (subSpans[0].skipTo(target)) { - inSameDoc = false; - } else { - more = false; - return false; - } - } - if(collectPayloads) { - matchPayload.clear(); - } - return advanceAfterOrdered(); - } - /** Advances the subSpans to just after an ordered match with a minimum slop * that is smaller than the slop allowed by the SpanNearQuery. * @return true iff there is such a match. */ - private boolean advanceAfterOrdered() throws IOException { - while (more && (inSameDoc || toSameDoc())) { - if (stretchToOrder() && shrinkToAfterShortestMatch()) { - return true; - } - } - return false; // no more matches - } - - - /** Advance the subSpans to the same document */ - private boolean toSameDoc() throws IOException { - sorter.sort(0, subSpansByDoc.length); - int firstIndex = 0; - int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc(); - while (subSpansByDoc[firstIndex].doc() != maxDoc) { - if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) { - more = false; - inSameDoc = false; - return false; - } - maxDoc = subSpansByDoc[firstIndex].doc(); - if (++firstIndex == subSpansByDoc.length) { - firstIndex = 0; - } - } - for (int i = 0; i < subSpansByDoc.length; i++) { - assert (subSpansByDoc[i].doc() == maxDoc) - : " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0] - + "\n at doc " + subSpansByDoc[i].doc() - + ", but should be at " + maxDoc; - } - inSameDoc = true; - return true; - } - - /** Check whether two Spans in the same document are ordered and not overlapping. - * @return false iff spans2's start position is smaller than spans1's end position - */ - static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) { - assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc(); - assert spans1.start() < spans1.end(); - assert spans2.start() < spans2.end(); - return spans1.end() <= spans2.start(); - } - - /** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans - * starts and ends as parameters. - */ - private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) { - assert start1 < end1; - assert start2 < end2; - return end1 <= start2; - } - - /** Order the subSpans within the same document by advancing all later spans - * after the previous one. - */ - private boolean stretchToOrder() throws IOException { - matchDoc = subSpans[0].doc(); - for (int i = 1; inSameDoc && (i < subSpans.length); i++) { - while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) { - if (! subSpans[i].next()) { - inSameDoc = false; - more = false; - break; - } else if (matchDoc != subSpans[i].doc()) { - inSameDoc = false; - break; + @Override + int toMatchDoc() throws IOException { + subSpansToFirstStartPosition(); + while (true) { + if (! stretchToOrder()) { + if (conjunction.nextDoc() == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + subSpansToFirstStartPosition(); + } else { + if (shrinkToAfterShortestMatch()) { + atFirstInCurrentDoc = true; + return conjunction.docID(); + } + // not a match, after shortest ordered spans, not at beginning of doc. + if (oneExhaustedInCurrentDoc) { + if (conjunction.nextDoc() == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + subSpansToFirstStartPosition(); } } } - return inSameDoc; + } + + @Override + boolean twoPhaseCurrentDocMatches() throws IOException { + subSpansToFirstStartPosition(); + while (true) { + if (! stretchToOrder()) { + return false; + } + if (shrinkToAfterShortestMatch()) { + atFirstInCurrentDoc = true; + return true; + } + // not a match, after shortest ordered spans + if (oneExhaustedInCurrentDoc) { + return false; + } + } + } + + @Override + public int nextStartPosition() throws IOException { + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; + return matchStart; + } + while (true) { + if (oneExhaustedInCurrentDoc) { + matchStart = NO_MORE_POSITIONS; + matchEnd = NO_MORE_POSITIONS; + return NO_MORE_POSITIONS; + } + if (! stretchToOrder()) { + matchStart = NO_MORE_POSITIONS; + matchEnd = NO_MORE_POSITIONS; + return NO_MORE_POSITIONS; + } + if (shrinkToAfterShortestMatch()) { // may also leave oneExhaustedInCurrentDoc + return matchStart; + } + // after shortest ordered spans, or oneExhaustedInCurrentDoc + } + } + + private void subSpansToFirstStartPosition() throws IOException { + for (Spans spans : subSpans) { + assert spans.startPosition() == -1 : "spans="+spans; + spans.nextStartPosition(); + assert spans.startPosition() != NO_MORE_POSITIONS; + } + oneExhaustedInCurrentDoc = false; + } + + /** Order the subSpans within the same document by using nextStartPosition on all subSpans + * after the first as little as necessary. + * Return true when the subSpans could be ordered in this way, + * otherwise at least one is exhausted in the current doc. + */ + private boolean stretchToOrder() throws IOException { + Spans prevSpans = subSpans.get(0); + assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans; + assert prevSpans.endPosition() != NO_MORE_POSITIONS; + for (int i = 1; i < subSpans.size(); i++) { + Spans spans = subSpans.get(i); + assert spans.startPosition() != NO_MORE_POSITIONS; + assert spans.endPosition() != NO_MORE_POSITIONS; + + while (prevSpans.endPosition() > spans.startPosition()) { // while overlapping spans + if (spans.nextStartPosition() == NO_MORE_POSITIONS) { + return false; + } + } + prevSpans = spans; + } + return true; // all subSpans ordered and non overlapping } /** The subSpans are ordered in the same doc, so there is a possible match. - * Compute the slop while making the match as short as possible by advancing - * all subSpans except the last one in reverse order. + * Compute the slop while making the match as short as possible by using nextStartPosition + * on all subSpans, except the last one, in reverse order. */ - private boolean shrinkToAfterShortestMatch() throws IOException { - matchStart = subSpans[subSpans.length - 1].start(); - matchEnd = subSpans[subSpans.length - 1].end(); - Set possibleMatchPayloads = new HashSet<>(); - if (subSpans[subSpans.length - 1].isPayloadAvailable()) { - possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload()); - } + protected boolean shrinkToAfterShortestMatch() throws IOException { + Spans lastSubSpans = subSpans.get(subSpans.size() - 1); + matchStart = lastSubSpans.startPosition(); + matchEnd = lastSubSpans.endPosition(); - Collection possiblePayload = null; - int matchSlop = 0; int lastStart = matchStart; int lastEnd = matchEnd; - for (int i = subSpans.length - 2; i >= 0; i--) { - Spans prevSpans = subSpans[i]; - if (collectPayloads && prevSpans.isPayloadAvailable()) { - Collection payload = prevSpans.getPayload(); - possiblePayload = new ArrayList<>(payload.size()); - possiblePayload.addAll(payload); - } - - int prevStart = prevSpans.start(); - int prevEnd = prevSpans.end(); - while (true) { // Advance prevSpans until after (lastStart, lastEnd) - if (! prevSpans.next()) { - inSameDoc = false; - more = false; - break; // Check remaining subSpans for final match. - } else if (matchDoc != prevSpans.doc()) { - inSameDoc = false; // The last subSpans is not advanced here. - break; // Check remaining subSpans for last match in this document. - } else { - int ppStart = prevSpans.start(); - int ppEnd = prevSpans.end(); // Cannot avoid invoking .end() - if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) { - break; // Check remaining subSpans. - } else { // prevSpans still before (lastStart, lastEnd) - prevStart = ppStart; - prevEnd = ppEnd; - if (collectPayloads && prevSpans.isPayloadAvailable()) { - Collection payload = prevSpans.getPayload(); - possiblePayload = new ArrayList<>(payload.size()); - possiblePayload.addAll(payload); - } - } + for (int i = subSpans.size() - 2; i >= 0; i--) { + Spans prevSpans = subSpans.get(i); + + int prevStart = prevSpans.startPosition(); + int prevEnd = prevSpans.endPosition(); + while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd) + if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + break; // Check remaining subSpans for match. } + int ppStart = prevSpans.startPosition(); + int ppEnd = prevSpans.endPosition(); + if (ppEnd > lastStart) { // if overlapping spans + break; // Check remaining subSpans. + } + // prevSpans still before (lastStart, lastEnd) + prevStart = ppStart; + prevEnd = ppEnd; } - if (collectPayloads && possiblePayload != null) { - possibleMatchPayloads.addAll(possiblePayload); - } - assert prevStart <= matchStart; if (matchStart > prevEnd) { // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); } /* Do not break on (matchSlop > allowedSlop) here to make sure - * that subSpans[0] is advanced after the match, if any. + * that on return the first subSpans has nextStartPosition called. */ matchStart = prevStart; lastStart = prevStart; lastEnd = prevEnd; } - + boolean match = matchSlop <= allowedSlop; - - if(collectPayloads && match && possibleMatchPayloads.size() > 0) { - matchPayload.addAll(possibleMatchPayloads); - } return match; // ordered and allowed slop } + @Override + public int startPosition() { + return atFirstInCurrentDoc ? -1 : matchStart; + } + + @Override + public int endPosition() { + return atFirstInCurrentDoc ? -1 : matchEnd; + } + + /** Throws an UnsupportedOperationException */ + @Override + public Collection getPayload() throws IOException { + throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); + } + + /** Throws an UnsupportedOperationException */ + @Override + public boolean isPayloadAvailable() { + throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); + } + @Override public String toString() { - return getClass().getName() + "("+query.toString()+")@"+ - (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END")); + return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java new file mode 100644 index 00000000000..b2ea4e85679 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java @@ -0,0 +1,146 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Collection; +import java.util.Set; + +/** A {@link NearSpansOrdered} that allows collecting payloads. + * Expert: + * Only public for subclassing. Most implementations should not need this class + */ +public class NearSpansPayloadOrdered extends NearSpansOrdered { + + private List matchPayload; + private Set possibleMatchPayloads; + + public NearSpansPayloadOrdered(SpanNearQuery query, List subSpans) + throws IOException { + super(query, subSpans); + this.matchPayload = new LinkedList<>(); + this.possibleMatchPayloads = new HashSet<>(); + } + + /** The subSpans are ordered in the same doc, so there is a possible match. + * Compute the slop while making the match as short as possible by using nextStartPosition + * on all subSpans, except the last one, in reverse order. + * Also collect the payloads. + */ + protected boolean shrinkToAfterShortestMatch() throws IOException { + Spans lastSubSpans = subSpans.get(subSpans.size() - 1); + matchStart = lastSubSpans.startPosition(); + matchEnd = lastSubSpans.endPosition(); + + matchPayload.clear(); + possibleMatchPayloads.clear(); + + if (lastSubSpans.isPayloadAvailable()) { + possibleMatchPayloads.addAll(lastSubSpans.getPayload()); + } + + Collection possiblePayload = null; + + int matchSlop = 0; + int lastStart = matchStart; + int lastEnd = matchEnd; + for (int i = subSpans.size() - 2; i >= 0; i--) { + Spans prevSpans = subSpans.get(i); + + if (prevSpans.isPayloadAvailable()) { + Collection payload = prevSpans.getPayload(); + possiblePayload = new ArrayList<>(payload.size()); + possiblePayload.addAll(payload); + } + + int prevStart = prevSpans.startPosition(); + int prevEnd = prevSpans.endPosition(); + while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd) + if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + break; // Check remaining subSpans for match. + } + int ppStart = prevSpans.startPosition(); + int ppEnd = prevSpans.endPosition(); + if (ppEnd > lastStart) { // if overlapping spans + break; // Check remaining subSpans. + } + // prevSpans still before (lastStart, lastEnd) + prevStart = ppStart; + prevEnd = ppEnd; + if (prevSpans.isPayloadAvailable()) { + Collection payload = prevSpans.getPayload(); + if (possiblePayload == null) { + possiblePayload = new ArrayList<>(payload.size()); + } else { + possiblePayload.clear(); + } + possiblePayload.addAll(payload); + } + } + + if (possiblePayload != null) { + possibleMatchPayloads.addAll(possiblePayload); + } + + assert prevStart <= matchStart; + if (matchStart > prevEnd) { // Only non overlapping spans add to slop. + matchSlop += (matchStart - prevEnd); + } + + /* Do not break on (matchSlop > allowedSlop) here to make sure + * that on return the first subSpans has nextStartPosition called. + */ + matchStart = prevStart; + lastStart = prevStart; + lastEnd = prevEnd; + } + + boolean match = matchSlop <= allowedSlop; + + if (match && possibleMatchPayloads.size() > 0) { + matchPayload.addAll(possibleMatchPayloads); + } + + return match; // ordered and allowed slop + } + + // TODO: Remove warning after API has been finalized + // TODO: Would be nice to be able to lazy load payloads + /** Return payloads when available. */ + @Override + public Collection getPayload() throws IOException { + return matchPayload; + } + + /** Indicates whether payloads are available */ + @Override + public boolean isPayloadAvailable() { + return ! matchPayload.isEmpty(); + } + + @Override + public String toString() { + return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition(); + } +} + diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index 168e52d2abf..814f6bdebaa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -17,253 +17,225 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.HashSet; /** * Similar to {@link NearSpansOrdered}, but for the unordered case. - * + * * Expert: * Only public for subclassing. Most implementations should not need this class */ -public class NearSpansUnordered extends Spans { - private SpanNearQuery query; +public class NearSpansUnordered extends NearSpans { - private List ordered = new ArrayList<>(); // spans in query order - private Spans[] subSpans; - private int slop; // from query + private List subSpanCells; // in query order - private SpansCell first; // linked list of spans - private SpansCell last; // sorted by doc only + private SpanPositionQueue spanPositionQueue; - private int totalLength; // sum of current lengths + public NearSpansUnordered(SpanNearQuery query, List subSpans) + throws IOException { + super(query, subSpans); - private CellQueue queue; // sorted queue of spans - private SpansCell max; // max element in queue + this.subSpanCells = new ArrayList<>(subSpans.size()); + for (Spans subSpan : subSpans) { // sub spans in query order + this.subSpanCells.add(new SpansCell(subSpan)); + } + spanPositionQueue = new SpanPositionQueue(subSpans.size()); + singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1 + } - private boolean more = true; // true iff not done - private boolean firstTime = true; // true before first next() + private void singleCellToPositionQueue() { + maxEndPositionCell = subSpanCells.get(0); + assert maxEndPositionCell.docID() == -1; + assert maxEndPositionCell.startPosition() == -1; + spanPositionQueue.add(maxEndPositionCell); + } - private class CellQueue extends PriorityQueue { - public CellQueue(int size) { + private void subSpanCellsToPositionQueue() throws IOException { // used when all subSpanCells arrived at the same doc. + spanPositionQueue.clear(); + for (SpansCell cell : subSpanCells) { + assert cell.startPosition() == -1; + cell.nextStartPosition(); + assert cell.startPosition() != NO_MORE_POSITIONS; + spanPositionQueue.add(cell); + } + } + + /** SpansCell wraps a sub Spans to maintain totalSpanLength and maxEndPositionCell */ + private int totalSpanLength; + private SpansCell maxEndPositionCell; + + private class SpansCell extends FilterSpans { + private int spanLength = -1; + + public SpansCell(Spans spans) { + super(spans); + } + + @Override + public int nextStartPosition() throws IOException { + int res = in.nextStartPosition(); + if (res != NO_MORE_POSITIONS) { + adjustLength(); + } + adjustMax(); // also after last end position in current doc. + return res; + } + + private void adjustLength() { + if (spanLength != -1) { + totalSpanLength -= spanLength; // subtract old, possibly from a previous doc + } + assert in.startPosition() != NO_MORE_POSITIONS; + spanLength = endPosition() - startPosition(); + assert spanLength >= 0; + totalSpanLength += spanLength; // add new + } + + private void adjustMax() { + assert docID() == maxEndPositionCell.docID(); + if (endPosition() > maxEndPositionCell.endPosition()) { + maxEndPositionCell = this; + } + } + + @Override + public String toString() { + return "NearSpansUnordered.SpansCell(" + in.toString() + ")"; + } + } + + + private static class SpanPositionQueue extends PriorityQueue { + public SpanPositionQueue(int size) { super(size); } - + @Override protected final boolean lessThan(SpansCell spans1, SpansCell spans2) { - if (spans1.doc() == spans2.doc()) { - return docSpansOrdered(spans1, spans2); - } else { - return spans1.doc() < spans2.doc(); - } + return positionsOrdered(spans1, spans2); } } - - /** Wraps a Spans, and can be used to form a linked list. */ - private class SpansCell extends Spans { - private Spans spans; - private SpansCell next; - private int length = -1; - private int index; - - public SpansCell(Spans spans, int index) { - this.spans = spans; - this.index = index; - } - - @Override - public boolean next() throws IOException { - return adjust(spans.next()); - } - - @Override - public boolean skipTo(int target) throws IOException { - return adjust(spans.skipTo(target)); - } - - private boolean adjust(boolean condition) { - if (length != -1) { - totalLength -= length; // subtract old length - } - if (condition) { - length = end() - start(); - totalLength += length; // add new length - - if (max == null || doc() > max.doc() - || (doc() == max.doc()) && (end() > max.end())) { - max = this; - } - } - more = condition; - return condition; - } - - @Override - public int doc() { return spans.doc(); } - - @Override - public int start() { return spans.start(); } - - @Override - public int end() { return spans.end(); } - // TODO: Remove warning after API has been finalized - @Override - public Collection getPayload() throws IOException { - return new ArrayList<>(spans.getPayload()); - } - - // TODO: Remove warning after API has been finalized - @Override - public boolean isPayloadAvailable() throws IOException { - return spans.isPayloadAvailable(); - } - - @Override - public long cost() { - return spans.cost(); - } - - @Override - public String toString() { return spans.toString() + "#" + index; } - } - - - public NearSpansUnordered(SpanNearQuery query, LeafReaderContext context, Bits acceptDocs, Map termContexts) - throws IOException { - this.query = query; - this.slop = query.getSlop(); - - SpanQuery[] clauses = query.getClauses(); - queue = new CellQueue(clauses.length); - subSpans = new Spans[clauses.length]; - for (int i = 0; i < clauses.length; i++) { - SpansCell cell = - new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i); - ordered.add(cell); - subSpans[i] = cell.spans; - } - } - public Spans[] getSubSpans() { - return subSpans; - } - @Override - public boolean next() throws IOException { - if (firstTime) { - initList(true); - listToQueue(); // initialize queue - firstTime = false; - } else if (more) { - if (min().next()) { // trigger further scanning - queue.updateTop(); // maintain queue - } else { - more = false; - } - } - - while (more) { - - boolean queueStale = false; - - if (min().doc() != max.doc()) { // maintain list - queueToList(); - queueStale = true; - } - - // skip to doc w/ all clauses - - while (more && first.doc() < last.doc()) { - more = first.skipTo(last.doc()); // skip first upto last - firstToLast(); // and move it to the end - queueStale = true; - } - - if (!more) return false; - - // found doc w/ all clauses - - if (queueStale) { // maintain the queue - listToQueue(); - queueStale = false; - } - - if (atMatch()) { - return true; - } - - more = min().next(); - if (more) { - queue.updateTop(); // maintain queue - } - } - return false; // no more matches - } - - @Override - public boolean skipTo(int target) throws IOException { - if (firstTime) { // initialize - initList(false); - for (SpansCell cell = first; more && cell!=null; cell=cell.next) { - more = cell.skipTo(target); // skip all - } - if (more) { - listToQueue(); - } - firstTime = false; - } else { // normal case - while (more && min().doc() < target) { // skip as needed - if (min().skipTo(target)) { - queue.updateTop(); - } else { - more = false; - } - } - } - return more && (atMatch() || next()); - } - /** Check whether two Spans in the same document are ordered with possible overlap. * @return true iff spans1 starts before spans2 * or the spans start at the same position, * and spans1 ends before spans2. */ - static final boolean docSpansOrdered(Spans spans1, Spans spans2) { - assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc(); - int start1 = spans1.start(); - int start2 = spans2.start(); - return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2); + static final boolean positionsOrdered(Spans spans1, Spans spans2) { + assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID(); + int start1 = spans1.startPosition(); + int start2 = spans2.startPosition(); + return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2); } - private SpansCell min() { return queue.top(); } + private SpansCell minPositionCell() { + return spanPositionQueue.top(); + } + + private boolean atMatch() { + assert minPositionCell().docID() == maxEndPositionCell.docID(); + return (maxEndPositionCell.endPosition() - minPositionCell().startPosition() - totalSpanLength) <= allowedSlop; + } @Override - public int doc() { return min().doc(); } - @Override - public int start() { return min().start(); } - @Override - public int end() { return max.end(); } + int toMatchDoc() throws IOException { + // at doc with all subSpans + subSpanCellsToPositionQueue(); + while (true) { + if (atMatch()) { + atFirstInCurrentDoc = true; + oneExhaustedInCurrentDoc = false; + return conjunction.docID(); + } + assert minPositionCell().startPosition() != NO_MORE_POSITIONS; + if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) { + spanPositionQueue.updateTop(); + } + else { // exhausted a subSpan in current doc + if (conjunction.nextDoc() == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + // at doc with all subSpans + subSpanCellsToPositionQueue(); + } + } + } + + @Override + boolean twoPhaseCurrentDocMatches() throws IOException { + // at doc with all subSpans + subSpanCellsToPositionQueue(); + while (true) { + if (atMatch()) { + atFirstInCurrentDoc = true; + oneExhaustedInCurrentDoc = false; + return true; + } + assert minPositionCell().startPosition() != NO_MORE_POSITIONS; + if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) { + spanPositionQueue.updateTop(); + } + else { // exhausted a subSpan in current doc + return false; + } + } + } + + @Override + public int nextStartPosition() throws IOException { + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; + return minPositionCell().startPosition(); + } + while (minPositionCell().startPosition() == -1) { // initially at current doc + minPositionCell().nextStartPosition(); + spanPositionQueue.updateTop(); + } + assert minPositionCell().startPosition() != NO_MORE_POSITIONS; + while (true) { + if (minPositionCell().nextStartPosition() == NO_MORE_POSITIONS) { + oneExhaustedInCurrentDoc = true; + return NO_MORE_POSITIONS; + } + spanPositionQueue.updateTop(); + if (atMatch()) { + return minPositionCell().startPosition(); + } + } + } + + @Override + public int startPosition() { + assert minPositionCell() != null; + return atFirstInCurrentDoc ? -1 + : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS + : minPositionCell().startPosition(); + } + + @Override + public int endPosition() { + return atFirstInCurrentDoc ? -1 + : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS + : maxEndPositionCell.endPosition(); + } + - // TODO: Remove warning after API has been finalized /** - * WARNING: The List is not necessarily in order of the the positions + * WARNING: The List is not necessarily in order of the positions. * @return Collection of byte[] payloads * @throws IOException if there is a low-level I/O error */ @Override public Collection getPayload() throws IOException { Set matchPayload = new HashSet<>(); - for (SpansCell cell = first; cell != null; cell = cell.next) { + for (SpansCell cell : subSpanCells) { if (cell.isPayloadAvailable()) { matchPayload.addAll(cell.getPayload()); } @@ -271,78 +243,23 @@ public class NearSpansUnordered extends Spans { return matchPayload; } - // TODO: Remove warning after API has been finalized @Override public boolean isPayloadAvailable() throws IOException { - SpansCell pointer = min(); - while (pointer != null) { - if (pointer.isPayloadAvailable()) { + for (SpansCell cell : subSpanCells) { + if (cell.isPayloadAvailable()) { return true; } - pointer = pointer.next; } - return false; } - - @Override - public long cost() { - long minCost = Long.MAX_VALUE; - for (int i = 0; i < subSpans.length; i++) { - minCost = Math.min(minCost, subSpans[i].cost()); - } - return minCost; - } @Override public String toString() { - return getClass().getName() + "("+query.toString()+")@"+ - (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END")); - } - - private void initList(boolean next) throws IOException { - for (int i = 0; more && i < ordered.size(); i++) { - SpansCell cell = ordered.get(i); - if (next) - more = cell.next(); // move to first entry - if (more) { - addToList(cell); // add to list - } + if (minPositionCell() != null) { + return getClass().getName() + "("+query.toString()+")@"+ + (docID()+":"+startPosition()+"-"+endPosition()); + } else { + return getClass().getName() + "("+query.toString()+")@ ?START?"; } } - - private void addToList(SpansCell cell) { - if (last != null) { // add next to end of list - last.next = cell; - } else - first = cell; - last = cell; - cell.next = null; - } - - private void firstToLast() { - last.next = first; // move first to end of list - last = first; - first = first.next; - last.next = null; - } - - private void queueToList() { - last = first = null; - while (queue.top() != null) { - addToList(queue.pop()); - } - } - - private void listToQueue() { - queue.clear(); // rebuild queue - for (SpansCell cell = first; cell != null; cell = cell.next) { - queue.add(cell); // add to queue from list - } - } - - private boolean atMatch() { - return (min().doc() == max.doc()) - && ((max.end() - min().start() - totalLength) <= slop); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java index 7bcaa2caafd..708b1af3b17 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java @@ -21,9 +21,9 @@ import org.apache.lucene.util.ToStringUtils; import java.io.IOException; -/** +/** * Matches spans near the beginning of a field. - *

+ *

* This class is a simple extension of {@link SpanPositionRangeQuery} in that it assumes the * start to be zero and only checks the end boundary. */ @@ -37,10 +37,10 @@ public class SpanFirstQuery extends SpanPositionRangeQuery { @Override protected AcceptStatus acceptPosition(Spans spans) throws IOException { - assert spans.start() != spans.end() : "start equals end: " + spans.start(); - if (spans.start() >= end) - return AcceptStatus.NO_AND_ADVANCE; - else if (spans.end() <= end) + assert spans.startPosition() != spans.endPosition() : "start equals end: " + spans.startPosition(); + if (spans.startPosition() >= end) + return AcceptStatus.NO_MORE_IN_CURRENT_DOC; + else if (spans.endPosition() <= end) return AcceptStatus.YES; else return AcceptStatus.NO; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java index aa69146366b..f299e5f8c43 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java @@ -105,7 +105,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery { @Override public int hashCode() { - int h = match.hashCode(); + int h = match.hashCode() ^ getClass().hashCode(); h ^= (h << 8) | (h >>> 25); // reversible //TODO: is this right? h ^= payloadToMatch.hashCode(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 1e1d0831058..71b49014133 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -37,7 +37,8 @@ import org.apache.lucene.util.ToStringUtils; /** Matches spans which are near one another. One can specify slop, the * maximum number of intervening unmatched positions, as well as whether - * matches are required to be in-order. */ + * matches are required to be in-order. + */ public class SpanNearQuery extends SpanQuery implements Cloneable { protected List clauses; protected int slop; @@ -53,22 +54,19 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { * must be in the same order as in clauses and must be non-overlapping. *
When inOrder is false, the spans from each clause * need not be ordered and may overlap. - * @param clauses the clauses to find near each other + * @param clauses the clauses to find near each other, in the same field, at least 2. * @param slop The slop value * @param inOrder true if order is important */ public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) { - this(clauses, slop, inOrder, true); + this(clauses, slop, inOrder, true); } - - public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) { - // copy clauses array into an ArrayList - this.clauses = new ArrayList<>(clauses.length); - for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (field == null) { // check field - field = clause.getField(); + public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, boolean collectPayloads) { + this.clauses = new ArrayList<>(clausesIn.length); + for (SpanQuery clause : clausesIn) { + if (this.field == null) { // check field + this.field = clause.getField(); } else if (clause.getField() != null && !clause.getField().equals(field)) { throw new IllegalArgumentException("Clauses must have same field."); } @@ -92,14 +90,13 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { @Override public String getField() { return field; } - + @Override public void extractTerms(Set terms) { for (final SpanQuery clause : clauses) { clause.extractTerms(terms); } - } - + } @Override public String toString(String field) { @@ -124,15 +121,21 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { @Override public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - if (clauses.size() == 0) // optimize 0-clause case - return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts); + ArrayList subSpans = new ArrayList<>(clauses.size()); - if (clauses.size() == 1) // optimize 1-clause case - return clauses.get(0).getSpans(context, acceptDocs, termContexts); - - return inOrder - ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads) - : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts); + for (SpanQuery seq : clauses) { + Spans subSpan = seq.getSpans(context, acceptDocs, termContexts); + if (subSpan != null) { + subSpans.add(subSpan); + } else { + return null; // all required + } + } + + // all NearSpans require at least two subSpans + return (! inOrder) ? new NearSpansUnordered(this, subSpans) + : collectPayloads ? new NearSpansPayloadOrdered(this, subSpans) + : new NearSpansOrdered(this, subSpans); } @Override @@ -148,12 +151,12 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } } if (clone != null) { - return clone; // some clauses rewrote + return clone; // some clauses rewrote } else { - return this; // no clauses rewrote + return this; // no clauses rewrote } } - + @Override public SpanNearQuery clone() { int sz = clauses.size(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 88c439d589b..5e1c3e4a2ae 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -30,9 +30,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Map; import java.util.Set; +import java.util.Objects; -/** Removes matches which overlap with another SpanQuery or - * within a x tokens before or y tokens after another SpanQuery. */ +/** Removes matches which overlap with another SpanQuery or which are + * within x tokens before or y tokens after another SpanQuery. + */ public class SpanNotQuery extends SpanQuery implements Cloneable { private SpanQuery include; private SpanQuery exclude; @@ -45,20 +47,20 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { this(include, exclude, 0, 0); } - + /** Construct a SpanNotQuery matching spans from include which - * have no overlap with spans from exclude within + * have no overlap with spans from exclude within * dist tokens of include. */ public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) { this(include, exclude, dist, dist); } - + /** Construct a SpanNotQuery matching spans from include which - * have no overlap with spans from exclude within + * have no overlap with spans from exclude within * pre tokens before or post tokens of include. */ public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) { - this.include = include; - this.exclude = exclude; + this.include = Objects.requireNonNull(include); + this.exclude = Objects.requireNonNull(exclude); this.pre = (pre >=0) ? pre : 0; this.post = (post >= 0) ? post : 0; @@ -96,81 +98,153 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { @Override public SpanNotQuery clone() { - SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(), - (SpanQuery) exclude.clone(), pre, post); + SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.clone(), + (SpanQuery) exclude.clone(), pre, post); spanNotQuery.setBoost(getBoost()); - return spanNotQuery; + return spanNotQuery; } @Override public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { + Spans includeSpans = include.getSpans(context, acceptDocs, termContexts); + if (includeSpans == null) { + return null; + } + + Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts); + if (excludeSpans == null) { + return includeSpans; + } + return new Spans() { - private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts); - private boolean moreInclude = true; + private boolean moreInclude = true; + private int includeStart = -1; + private int includeEnd = -1; + private boolean atFirstInCurrentDoc = false; - private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts); - private boolean moreExclude = excludeSpans.next(); + private boolean moreExclude = excludeSpans.nextDoc() != NO_MORE_DOCS; + private int excludeStart = moreExclude ? excludeSpans.nextStartPosition() : NO_MORE_POSITIONS; - @Override - public boolean next() throws IOException { - if (moreInclude) // move to next include - moreInclude = includeSpans.next(); - while (moreInclude && moreExclude) { + @Override + public int nextDoc() throws IOException { + if (moreInclude) { + moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS; + if (moreInclude) { + atFirstInCurrentDoc = true; + includeStart = includeSpans.nextStartPosition(); + assert includeStart != NO_MORE_POSITIONS; + } + } + toNextIncluded(); + int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS; + return res; + } - if (includeSpans.doc() > excludeSpans.doc()) // skip exclude - moreExclude = excludeSpans.skipTo(includeSpans.doc()); - - while (moreExclude // while exclude is before - && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start() - pre) { - moreExclude = excludeSpans.next(); // increment exclude + private void toNextIncluded() throws IOException { + while (moreInclude && moreExclude) { + if (includeSpans.docID() > excludeSpans.docID()) { + moreExclude = excludeSpans.advance(includeSpans.docID()) != NO_MORE_DOCS; + if (moreExclude) { + excludeStart = -1; // only use exclude positions at same doc } - - if (!moreExclude // if no intersection - || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end()+post <= excludeSpans.start()) - break; // we found a match - - moreInclude = includeSpans.next(); // intersected: keep scanning } - return moreInclude; - } - - @Override - public boolean skipTo(int target) throws IOException { - if (moreInclude) // skip include - moreInclude = includeSpans.skipTo(target); - - if (!moreInclude) - return false; - - if (moreExclude // skip exclude - && includeSpans.doc() > excludeSpans.doc()) - moreExclude = excludeSpans.skipTo(includeSpans.doc()); - - while (moreExclude // while exclude is before - && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()-pre) { - moreExclude = excludeSpans.next(); // increment exclude + if (excludeForwardInCurrentDocAndAtMatch()) { + break; // at match. } - if (!moreExclude // if no intersection - || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end()+post <= excludeSpans.start()) - return true; // we found a match + // else intersected: keep scanning, to next doc if needed + includeStart = includeSpans.nextStartPosition(); + if (includeStart == NO_MORE_POSITIONS) { + moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS; + if (moreInclude) { + atFirstInCurrentDoc = true; + includeStart = includeSpans.nextStartPosition(); + assert includeStart != NO_MORE_POSITIONS; + } + } + } + } - return next(); // scan to next match + private boolean excludeForwardInCurrentDocAndAtMatch() throws IOException { + assert moreInclude; + assert includeStart != NO_MORE_POSITIONS; + if (! moreExclude) { + return true; + } + if (includeSpans.docID() != excludeSpans.docID()) { + return true; + } + // at same doc + if (excludeStart == -1) { // init exclude start position if needed + excludeStart = excludeSpans.nextStartPosition(); + assert excludeStart != NO_MORE_POSITIONS; + } + while (excludeSpans.endPosition() <= includeStart - pre) { + // exclude end position is before a possible exclusion + excludeStart = excludeSpans.nextStartPosition(); + if (excludeStart == NO_MORE_POSITIONS) { + return true; // no more exclude at current doc. + } + } + // exclude end position far enough in current doc, check start position: + boolean res = includeSpans.endPosition() + post <= excludeStart; + return res; + } + + @Override + public int advance(int target) throws IOException { + if (moreInclude) { + assert target > includeSpans.docID() : "target="+target+", includeSpans.docID()="+includeSpans.docID(); + moreInclude = includeSpans.advance(target) != NO_MORE_DOCS; + if (moreInclude) { + atFirstInCurrentDoc = true; + includeStart = includeSpans.nextStartPosition(); + assert includeStart != NO_MORE_POSITIONS; + } + } + toNextIncluded(); + int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS; + return res; + } + + @Override + public int docID() { + int res = includeSpans.docID(); + return res; + } + + @Override + public int nextStartPosition() throws IOException { + assert moreInclude; + + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; + assert includeStart != NO_MORE_POSITIONS; + return includeStart; } - @Override - public int doc() { return includeSpans.doc(); } - @Override - public int start() { return includeSpans.start(); } - @Override - public int end() { return includeSpans.end(); } + includeStart = includeSpans.nextStartPosition(); + while ((includeStart != NO_MORE_POSITIONS) + && (! excludeForwardInCurrentDocAndAtMatch())) + { + includeStart = includeSpans.nextStartPosition(); + } + + return includeStart; + } + + @Override + public int startPosition() { + assert includeStart == includeSpans.startPosition(); + return atFirstInCurrentDoc ? -1 : includeStart; + } + + @Override + public int endPosition() { + return atFirstInCurrentDoc ? -1 : includeSpans.endPosition(); + } - // TODO: Remove warning after API has been finalized @Override public Collection getPayload() throws IOException { ArrayList result = null; @@ -180,7 +254,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { return result; } - // TODO: Remove warning after API has been finalized @Override public boolean isPayloadAvailable() throws IOException { return includeSpans.isPayloadAvailable(); @@ -193,10 +266,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { @Override public String toString() { - return "spans(" + SpanNotQuery.this.toString() + ")"; - } - - }; + return "spans(" + SpanNotQuery.this.toString() + ")"; + } + }; } @Override @@ -230,7 +302,7 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { SpanNotQuery other = (SpanNotQuery)o; return this.include.equals(other.include) && this.exclude.equals(other.exclude) - && this.pre == other.pre + && this.pre == other.pre && this.post == other.post; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 2b617e49ebc..71215d063cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -35,18 +35,19 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.search.Query; -/** Matches the union of its clauses.*/ +/** Matches the union of its clauses. + */ public class SpanOrQuery extends SpanQuery implements Cloneable { private List clauses; private String field; - /** Construct a SpanOrQuery merging the provided clauses. */ + /** Construct a SpanOrQuery merging the provided clauses. + * All clauses must have the same field. + */ public SpanOrQuery(SpanQuery... clauses) { - - // copy clauses array into an ArrayList this.clauses = new ArrayList<>(clauses.length); - for (int i = 0; i < clauses.length; i++) { - addClause(clauses[i]); + for (SpanQuery seq : clauses) { + addClause(seq); } } @@ -59,7 +60,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { } this.clauses.add(clause); } - + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); @@ -74,7 +75,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { clause.extractTerms(terms); } } - + @Override public SpanOrQuery clone() { int sz = clauses.size(); @@ -152,90 +153,120 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { @Override protected final boolean lessThan(Spans spans1, Spans spans2) { - if (spans1.doc() == spans2.doc()) { - if (spans1.start() == spans2.start()) { - return spans1.end() < spans2.end(); + if (spans1.docID() == spans2.docID()) { + if (spans1.startPosition() == spans2.startPosition()) { + return spans1.endPosition() < spans2.endPosition(); } else { - return spans1.start() < spans2.start(); + return spans1.startPosition() < spans2.startPosition(); } } else { - return spans1.doc() < spans2.doc(); + return spans1.docID() < spans2.docID(); } } } @Override - public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { - if (clauses.size() == 1) // optimize 1-clause case - return (clauses.get(0)).getSpans(context, acceptDocs, termContexts); + public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map termContexts) + throws IOException { + + ArrayList subSpans = new ArrayList<>(clauses.size()); + + for (SpanQuery seq : clauses) { + Spans subSpan = seq.getSpans(context, acceptDocs, termContexts); + if (subSpan != null) { + subSpans.add(subSpan); + } + } + + if (subSpans.size() == 0) { + return null; + } else if (subSpans.size() == 1) { + return subSpans.get(0); + } + + SpanQueue queue = new SpanQueue(clauses.size()); + for (Spans spans : subSpans) { + queue.add(spans); + } return new Spans() { - private SpanQueue queue = null; - private long cost; - private boolean initSpanQueue(int target) throws IOException { - queue = new SpanQueue(clauses.size()); - Iterator i = clauses.iterator(); - while (i.hasNext()) { - Spans spans = i.next().getSpans(context, acceptDocs, termContexts); - cost += spans.cost(); - if ( ((target == -1) && spans.next()) - || ((target != -1) && spans.skipTo(target))) { - queue.add(spans); - } - } - return queue.size() != 0; + @Override + public int nextDoc() throws IOException { + if (queue.size() == 0) { // all done + return NO_MORE_DOCS; } - @Override - public boolean next() throws IOException { - if (queue == null) { - return initSpanQueue(-1); - } + int currentDoc = top().docID(); - if (queue.size() == 0) { // all done - return false; - } + if (currentDoc == -1) { // initially + return advance(0); + } - if (top().next()) { // move to next + do { + if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc queue.updateTop(); - return true; - } - - queue.pop(); // exhausted a clause - return queue.size() != 0; - } - - private Spans top() { return queue.top(); } - - @Override - public boolean skipTo(int target) throws IOException { - if (queue == null) { - return initSpanQueue(target); - } - - boolean skipCalled = false; - while (queue.size() != 0 && top().doc() < target) { - if (top().skipTo(target)) { - queue.updateTop(); - } else { - queue.pop(); + } else { + queue.pop(); // exhausted a clause + if (queue.size() == 0) { + return NO_MORE_DOCS; } - skipCalled = true; } - - if (skipCalled) { - return queue.size() != 0; + // assert queue.size() > 0; + int doc = top().docID(); + if (doc > currentDoc) { + return doc; + } + } while (true); + } + + private Spans top() { + return queue.top(); + } + + @Override + public int advance(int target) throws IOException { + + while ((queue.size() > 0) && (top().docID() < target)) { + if (top().advance(target) != NO_MORE_DOCS) { + queue.updateTop(); + } else { + queue.pop(); } - return next(); } - @Override - public int doc() { return top().doc(); } - @Override - public int start() { return top().start(); } - @Override - public int end() { return top().end(); } + return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS; + } + + @Override + public int docID() { + return (queue == null) ? -1 + : (queue.size() > 0) ? top().docID() + : NO_MORE_DOCS; + } + + @Override + public int nextStartPosition() throws IOException { + top().nextStartPosition(); + queue.updateTop(); + int startPos = top().startPosition(); + while (startPos == -1) { // initially at this doc + top().nextStartPosition(); + queue.updateTop(); + startPos = top().startPosition(); + } + return startPos; + } + + @Override + public int startPosition() { + return top().startPosition(); + } + + @Override + public int endPosition() { + return top().endPosition(); + } @Override public Collection getPayload() throws IOException { @@ -257,15 +288,23 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { public String toString() { return "spans("+SpanOrQuery.this+")@"+ ((queue == null)?"START" - :(queue.size()>0?(doc()+":"+start()+"-"+end()):"END")); - } + :(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END")); + } + + private long cost = -1; @Override public long cost() { + if (cost == -1) { + cost = 0; + for (Spans spans : subSpans) { + cost += spans.cost(); + } + } return cost; } - - }; + + }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java index dda6009e420..5edfef285db 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java @@ -28,15 +28,14 @@ import java.util.Iterator; * Only return those matches that have a specific payload at * the given position. *

- * Do not use this with an SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. Instead, use - * {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads + * Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. + * Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads * aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}. */ -public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{ +public class SpanPayloadCheckQuery extends SpanPositionCheckQuery { protected final Collection payloadToMatch; /** - * * @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check * @param payloadToMatch The {@link java.util.Collection} of payloads to match */ @@ -71,7 +70,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{ } } return AcceptStatus.YES; - } + } @Override public String toString(String field) { @@ -108,7 +107,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{ @Override public int hashCode() { - int h = match.hashCode(); + int h = match.hashCode() ^ getClass().hashCode(); h ^= (h << 8) | (h >>> 25); // reversible //TODO: is this right? h ^= payloadToMatch.hashCode(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index a41442d0699..2df1e5e3ff3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -25,10 +25,9 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; import java.util.Map; import java.util.Set; +import java.util.Objects; /** @@ -37,9 +36,8 @@ import java.util.Set; public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable { protected SpanQuery match; - public SpanPositionCheckQuery(SpanQuery match) { - this.match = match; + this.match = Objects.requireNonNull(match); } /** @@ -60,42 +58,44 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea match.extractTerms(terms); } - /** + /** * Return value for {@link SpanPositionCheckQuery#acceptPosition(Spans)}. */ protected static enum AcceptStatus { /** Indicates the match should be accepted */ YES, - + /** Indicates the match should be rejected */ NO, - - /** - * Indicates the match should be rejected, and the enumeration should advance - * to the next document. + + /** + * Indicates the match should be rejected, and the enumeration may continue + * with the next document. */ - NO_AND_ADVANCE + NO_MORE_IN_CURRENT_DOC }; - + /** * Implementing classes are required to return whether the current position is a match for the passed in - * "match" {@link org.apache.lucene.search.spans.SpanQuery}. + * "match" {@link SpanQuery}. * - * This is only called if the underlying {@link org.apache.lucene.search.spans.Spans#next()} for the - * match is successful + * This is only called if the underlying last {@link Spans#nextStartPosition()} for the + * match indicated a valid start position. * * - * @param spans The {@link org.apache.lucene.search.spans.Spans} instance, positioned at the spot to check + * @param spans The {@link Spans} instance, positioned at the spot to check + * * @return whether the match is accepted, rejected, or rejected and should move to the next doc. * - * @see org.apache.lucene.search.spans.Spans#next() + * @see Spans#nextDoc() * */ protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - return new PositionCheckSpan(context, acceptDocs, termContexts); + Spans matchSpans = match.getSpans(context, acceptDocs, termContexts); + return (matchSpans == null) ? null : new PositionCheckSpans(matchSpans); } @@ -116,79 +116,110 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea } } - protected class PositionCheckSpan extends Spans { - private Spans spans; + protected class PositionCheckSpans extends FilterSpans { - public PositionCheckSpan(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { - spans = match.getSpans(context, acceptDocs, termContexts); + private boolean atFirstInCurrentDoc = false; + private int startPos = -1; + + public PositionCheckSpans(Spans matchSpans) throws IOException { + super(matchSpans); } @Override - public boolean next() throws IOException { - if (!spans.next()) - return false; - - return doNext(); + public int nextDoc() throws IOException { + if (in.nextDoc() == NO_MORE_DOCS) + return NO_MORE_DOCS; + + return toNextDocWithAllowedPosition(); } @Override - public boolean skipTo(int target) throws IOException { - if (!spans.skipTo(target)) - return false; + public int advance(int target) throws IOException { + if (in.advance(target) == NO_MORE_DOCS) + return NO_MORE_DOCS; - return doNext(); + return toNextDocWithAllowedPosition(); } - - protected boolean doNext() throws IOException { + + @SuppressWarnings("fallthrough") + protected int toNextDocWithAllowedPosition() throws IOException { + startPos = in.nextStartPosition(); + assert startPos != NO_MORE_POSITIONS; for (;;) { switch(acceptPosition(this)) { - case YES: return true; - case NO: - if (!spans.next()) - return false; - break; - case NO_AND_ADVANCE: - if (!spans.skipTo(spans.doc()+1)) - return false; + case YES: + atFirstInCurrentDoc = true; + return in.docID(); + case NO: + startPos = in.nextStartPosition(); + if (startPos != NO_MORE_POSITIONS) { + break; + } + // else fallthrough + case NO_MORE_IN_CURRENT_DOC: + if (in.nextDoc() == NO_MORE_DOCS) { + startPos = -1; + return NO_MORE_DOCS; + } + startPos = in.nextStartPosition(); + assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID(); break; } } } @Override - public int doc() { return spans.doc(); } - - @Override - public int start() { return spans.start(); } - - @Override - public int end() { return spans.end(); } - // TODO: Remove warning after API has been finalized - - @Override - public Collection getPayload() throws IOException { - ArrayList result = null; - if (spans.isPayloadAvailable()) { - result = new ArrayList<>(spans.getPayload()); + public int nextStartPosition() throws IOException { + if (atFirstInCurrentDoc) { + atFirstInCurrentDoc = false; + return startPos; } - return result;//TODO: any way to avoid the new construction? - } - // TODO: Remove warning after API has been finalized - @Override - public boolean isPayloadAvailable() throws IOException { - return spans.isPayloadAvailable(); + for (;;) { + startPos = in.nextStartPosition(); + if (startPos == NO_MORE_POSITIONS) { + return NO_MORE_POSITIONS; + } + switch(acceptPosition(this)) { + case YES: + return startPos; + case NO: + break; + case NO_MORE_IN_CURRENT_DOC: + return startPos = NO_MORE_POSITIONS; // startPos ahead for the current doc. + } + } } @Override - public long cost() { - return spans.cost(); + public int startPosition() { + return atFirstInCurrentDoc ? -1 : startPos; + } + + @Override + public int endPosition() { + return atFirstInCurrentDoc ? -1 + : (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS; } @Override public String toString() { - return "spans(" + SpanPositionCheckQuery.this.toString() + ")"; - } + return "spans(" + SpanPositionCheckQuery.this.toString() + ")"; + } + } + /** Returns true iff o is equal to this. */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null) return false; + if (getClass() != o.getClass()) return false; + final SpanPositionCheckQuery spcq = (SpanPositionCheckQuery) o; + return match.equals(spcq.match); + } + + @Override + public int hashCode() { + return match.hashCode() ^ getClass().hashCode(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java index f588d281058..3da4e1ae222 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java @@ -25,10 +25,10 @@ import java.io.IOException; /** * Checks to see if the {@link #getMatch()} lies between a start and end position * - * @see org.apache.lucene.search.spans.SpanFirstQuery for a derivation that is optimized for the case where start position is 0 + * See {@link SpanFirstQuery} for a derivation that is optimized for the case where start position is 0. */ public class SpanPositionRangeQuery extends SpanPositionCheckQuery { - protected int start = 0; + protected int start; protected int end; public SpanPositionRangeQuery(SpanQuery match, int start, int end) { @@ -40,13 +40,12 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery { @Override protected AcceptStatus acceptPosition(Spans spans) throws IOException { - assert spans.start() != spans.end(); - if (spans.start() >= end) - return AcceptStatus.NO_AND_ADVANCE; - else if (spans.start() >= start && spans.end() <= end) - return AcceptStatus.YES; - else - return AcceptStatus.NO; + assert spans.startPosition() != spans.endPosition(); + AcceptStatus res = (spans.startPosition() >= end) + ? AcceptStatus.NO_MORE_IN_CURRENT_DOC + : (spans.startPosition() >= start && spans.endPosition() <= end) + ? AcceptStatus.YES : AcceptStatus.NO; + return res; } @@ -96,7 +95,7 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery { @Override public int hashCode() { - int h = match.hashCode(); + int h = match.hashCode() ^ getClass().hashCode(); h ^= (h << 8) | (h >>> 25); // reversible h ^= Float.floatToRawIntBits(getBoost()) ^ end ^ start; return h; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index 00bed758488..7c2687aa8e5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -25,16 +25,17 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { - /** Expert: Returns the matches for this query in an index. Used internally - * to search for spans. */ + /** Expert: Returns the matches for this query in an index. + * Used internally to search for spans. + * This may return null to indicate that the SpanQuery has no results. + */ public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException; - /** + /** * Returns the name of the field matched by this query. *

* Note that this may return null if the query matches no terms. @@ -42,7 +43,7 @@ public abstract class SpanQuery extends Query { public abstract String getField(); @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new SpanWeight(this, searcher); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 56b25713aa6..3c9a90eb7ac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -18,9 +18,9 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.util.Objects; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; /** @@ -29,58 +29,68 @@ import org.apache.lucene.search.similarities.Similarity; public class SpanScorer extends Scorer { protected Spans spans; - protected boolean more = true; - protected int doc; protected float freq; protected int numMatches; protected final Similarity.SimScorer docScorer; - - protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer) + + protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(weight); - this.docScorer = docScorer; - this.spans = spans; - - doc = -1; - more = spans.next(); + this.docScorer = Objects.requireNonNull(docScorer); + this.spans = Objects.requireNonNull(spans); + this.doc = -1; } @Override public int nextDoc() throws IOException { - if (!setFreqCurrentDoc()) { - doc = NO_MORE_DOCS; + int prevDoc = doc; + doc = spans.nextDoc(); + if (doc != NO_MORE_DOCS) { + setFreqCurrentDoc(); } return doc; } @Override public int advance(int target) throws IOException { - if (!more) { - return doc = NO_MORE_DOCS; - } - if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead - more = spans.skipTo(target); - } - if (!setFreqCurrentDoc()) { - doc = NO_MORE_DOCS; + int prevDoc = doc; + doc = spans.advance(target); + if (doc != NO_MORE_DOCS) { + setFreqCurrentDoc(); } return doc; } - + protected boolean setFreqCurrentDoc() throws IOException { - if (!more) { - return false; - } - doc = spans.doc(); freq = 0.0f; numMatches = 0; + + assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans; + assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans; + int prevStartPos = -1; + int prevEndPos = -1; + + int startPos = spans.nextStartPosition(); + assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans; do { - int matchLength = spans.end() - spans.start(); - freq += docScorer.computeSlopFactor(matchLength); + assert startPos >= prevStartPos; + int endPos = spans.endPosition(); + assert endPos != Spans.NO_MORE_POSITIONS; + // This assertion can fail for Or spans on the same term: + // assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos; + assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos; numMatches++; - more = spans.next(); - } while (more && (doc == spans.doc())); + int matchLength = endPos - startPos; + freq += docScorer.computeSlopFactor(matchLength); + prevStartPos = startPos; + prevEndPos = endPos; + startPos = spans.nextStartPosition(); + } while (startPos != Spans.NO_MORE_POSITIONS); + + assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans; + assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans; + return true; } @@ -89,15 +99,16 @@ public class SpanScorer extends Scorer { @Override public float score() throws IOException { - return docScorer.score(doc, freq); + float s = docScorer.score(doc, freq); + return s; } - + @Override public int freq() throws IOException { return numMatches; } - /** Returns the intermediate "sloppy freq" adjusted for edit distance + /** Returns the intermediate "sloppy freq" adjusted for edit distance * @lucene.internal */ // only public so .payloads can see it. public float sloppyFreq() throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 2dc79ff6377..caa3963cc41 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Map; import java.util.Set; +import java.util.Objects; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.LeafReaderContext; @@ -31,19 +32,23 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; -/** Matches spans containing a term. */ +/** Matches spans containing a term. + * This should not be used for terms that are indexed at position Integer.MAX_VALUE. + */ public class SpanTermQuery extends SpanQuery { protected Term term; /** Construct a SpanTermQuery matching the named term's spans. */ - public SpanTermQuery(Term term) { this.term = term; } + public SpanTermQuery(Term term) { + this.term = Objects.requireNonNull(term); + } /** Return the term whose spans are matched. */ public Term getTerm() { return term; } @Override public String getField() { return term.field(); } - + @Override public void extractTerms(Set terms) { terms.add(term); @@ -64,7 +69,7 @@ public class SpanTermQuery extends SpanQuery { public int hashCode() { final int prime = 31; int result = super.hashCode(); - result = prime * result + ((term == null) ? 0 : term.hashCode()); + result = prime * result + term.hashCode(); return result; } @@ -77,12 +82,7 @@ public class SpanTermQuery extends SpanQuery { if (getClass() != obj.getClass()) return false; SpanTermQuery other = (SpanTermQuery) obj; - if (term == null) { - if (other.term != null) - return false; - } else if (!term.equals(other.term)) - return false; - return true; + return term.equals(other.term); } @Override @@ -99,7 +99,7 @@ public class SpanTermQuery extends SpanQuery { } final TermsEnum termsEnum = terms.iterator(null); - if (termsEnum.seekExact(term.bytes())) { + if (termsEnum.seekExact(term.bytes())) { state = termsEnum.termState(); } else { state = null; @@ -110,14 +110,14 @@ public class SpanTermQuery extends SpanQuery { } else { state = termContext.get(context.ord); } - + if (state == null) { // term is not present in that reader - return TermSpans.EMPTY_TERM_SPANS; + return null; } - + final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); - + final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.PAYLOADS); return new TermSpans(postings, term); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index c172243e86c..b7f1b288a51 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -51,7 +51,7 @@ public class SpanWeight extends Weight { super(query); this.similarity = searcher.getSimilarity(); this.query = query; - + termContexts = new HashMap<>(); TreeSet terms = new TreeSet<>(); query.extractTerms(terms); @@ -66,8 +66,8 @@ public class SpanWeight extends Weight { } final String field = query.getField(); if (field != null) { - stats = similarity.computeWeight(query.getBoost(), - searcher.collectionStatistics(query.getField()), + stats = similarity.computeWeight(query.getBoost(), + searcher.collectionStatistics(query.getField()), termStats); } } @@ -88,9 +88,9 @@ public class SpanWeight extends Weight { public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { if (stats == null) { return null; - } else { - return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context)); } + Spans spans = query.getSpans(context, acceptDocs, termContexts); + return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context)); } @Override @@ -106,11 +106,11 @@ public class SpanWeight extends Weight { Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); result.addDetail(scoreExplanation); result.setValue(scoreExplanation.getValue()); - result.setMatch(true); + result.setMatch(true); return result; } } - + return new ComplexExplanation(false, 0.0f, "no matching term"); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java index 32aff3b2879..ea8bf8a5583 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java @@ -20,54 +20,44 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Collection; -/** Expert: an enumeration of span matches. Used to implement span searching. - * Each span represents a range of term positions within a document. Matches - * are enumerated in order, by increasing document number, within that by - * increasing start position and finally by increasing end position. */ -public abstract class Spans { - /** Move to the next match, returning true iff any such exists. */ - public abstract boolean next() throws IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; - /** Skips to the first match beyond the current, whose document number is - * greater than or equal to target. - *

The behavior of this method is undefined when called with - * target ≤ current, or after the iterator has exhausted. - * Both cases may result in unpredicted behavior. - *

Returns true iff there is such - * a match.

Behaves as if written: - *

-   *   boolean skipTo(int target) {
-   *     do {
-   *       if (!next())
-   *         return false;
-   *     } while (target > doc());
-   *     return true;
-   *   }
-   * 
- * Most implementations are considerably more efficient than that. - */ - public abstract boolean skipTo(int target) throws IOException; +/** Iterates through combinations of start/end positions per-doc. + * Each start/end position represents a range of term positions within the current document. + * These are enumerated in order, by increasing document number, within that by + * increasing start position and finally by increasing end position. + */ +public abstract class Spans extends DocIdSetIterator { + public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE; - /** Returns the document number of the current match. Initially invalid. */ - public abstract int doc(); - - /** Returns the start position of the current match. Initially invalid. */ - public abstract int start(); - - /** Returns the end position of the current match. Initially invalid. */ - public abstract int end(); - /** - * Returns the payload data for the current span. - * This is invalid until {@link #next()} is called for - * the first time. + * Returns the next start position for the current doc. + * There is always at least one start/end position per doc. + * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}. + */ + public abstract int nextStartPosition() throws IOException; + + /** + * Returns the start position in the current doc, or -1 when {@link #nextStartPosition} was not yet called on the current doc. + * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}. + */ + public abstract int startPosition(); + + /** + * Returns the end position for the current start position, or -1 when {@link #nextStartPosition} was not yet called on the current doc. + * After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}. + */ + public abstract int endPosition(); + + /** + * Returns the payload data for the current start/end position. + * This is only valid after {@link #nextStartPosition()} + * returned an available start position. * This method must not be called more than once after each call - * of {@link #next()}. However, most payloads are loaded lazily, + * of {@link #nextStartPosition()}. However, most payloads are loaded lazily, * so if the payload data for the current position is not needed, - * this method may not be called at all for performance reasons. An ordered - * SpanQuery does not lazy load, so if you have payloads in your index and - * you do not want ordered SpanNearQuerys to collect payloads, you can - * disable collection with a constructor option.
+ * this method may not be called at all for performance reasons. *
* Note that the return type is a collection, thus the ordering should not be relied upon. *
@@ -76,25 +66,35 @@ public abstract class Spans { * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false * @throws IOException if there is a low-level I/O error */ - // TODO: Remove warning after API has been finalized public abstract Collection getPayload() throws IOException; /** - * Checks if a payload can be loaded at this position. + * Checks if a payload can be loaded at the current start/end position. *

* Payloads can only be loaded once per call to - * {@link #next()}. + * {@link #nextStartPosition()}. * - * @return true if there is a payload available at this position that can be loaded + * @return true if there is a payload available at this start/end position + * that can be loaded */ public abstract boolean isPayloadAvailable() throws IOException; - + /** - * Returns the estimated cost of this spans. - *

- * This is generally an upper bound of the number of documents this iterator - * might match, but may be a rough heuristic, hardcoded value, or otherwise - * completely inaccurate. + * Optional method: Return a {@link TwoPhaseIterator} view of this + * {@link Spans}. A return value of {@code null} indicates that + * two-phase iteration is not supported. + * + * Note that the returned {@link TwoPhaseIterator}'s + * {@link TwoPhaseIterator#approximation() approximation} must + * advance synchronously with this iterator: advancing the approximation must + * advance this iterator and vice-versa. + * + * Implementing this method is typically useful on {@link Spans}s + * that have a high per-document overhead in order to confirm matches. + * + * The default implementation returns {@code null}. */ - public abstract long cost(); + public TwoPhaseIterator asTwoPhaseIterator() { + return null; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index bca88de5ef2..5351b3d5513 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -24,10 +24,12 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Collections; import java.util.Collection; +import java.util.Objects; /** * Expert: - * Public for extension only + * Public for extension only. + * This does not work correctly for terms that indexed at position Integer.MAX_VALUE. */ public class TermSpans extends Spans { protected final PostingsEnum postings; @@ -39,65 +41,67 @@ public class TermSpans extends Spans { protected boolean readPayload; public TermSpans(PostingsEnum postings, Term term) { - this.postings = postings; - this.term = term; - doc = -1; - } - - // only for EmptyTermSpans (below) - TermSpans() { - term = null; - postings = null; + this.postings = Objects.requireNonNull(postings); + this.term = Objects.requireNonNull(term); + this.doc = -1; + this.position = -1; } @Override - public boolean next() throws IOException { - if (count == freq) { - if (postings == null) { - return false; - } - doc = postings.nextDoc(); - if (doc == DocIdSetIterator.NO_MORE_DOCS) { - return false; - } + public int nextDoc() throws IOException { + doc = postings.nextDoc(); + if (doc != DocIdSetIterator.NO_MORE_DOCS) { freq = postings.freq(); + assert freq >= 1; count = 0; } - position = postings.nextPosition(); - count++; - readPayload = false; - return true; - } - - @Override - public boolean skipTo(int target) throws IOException { - assert target > doc; - doc = postings.advance(target); - if (doc == DocIdSetIterator.NO_MORE_DOCS) { - return false; - } - - freq = postings.freq(); - count = 0; - position = postings.nextPosition(); - count++; - readPayload = false; - return true; - } - - @Override - public int doc() { + position = -1; return doc; } @Override - public int start() { + public int advance(int target) throws IOException { + assert target > doc; + doc = postings.advance(target); + if (doc != DocIdSetIterator.NO_MORE_DOCS) { + freq = postings.freq(); + assert freq >= 1; + count = 0; + } + position = -1; + return doc; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextStartPosition() throws IOException { + if (count == freq) { + assert position != NO_MORE_POSITIONS; + return position = NO_MORE_POSITIONS; + } + int prevPosition = position; + position = postings.nextPosition(); + assert position >= prevPosition : "prevPosition="+prevPosition+" > position="+position; + assert position != NO_MORE_POSITIONS; // int endPosition not possible + count++; + readPayload = false; return position; } @Override - public int end() { - return position + 1; + public int startPosition() { + return position; + } + + @Override + public int endPosition() { + return (position == -1) ? -1 + : (position != NO_MORE_POSITIONS) ? position + 1 + : NO_MORE_POSITIONS; } @Override @@ -105,7 +109,6 @@ public class TermSpans extends Spans { return postings.cost(); } - // TODO: Remove warning after API has been finalized @Override public Collection getPayload() throws IOException { final BytesRef payload = postings.getPayload(); @@ -120,7 +123,6 @@ public class TermSpans extends Spans { return Collections.singletonList(bytes); } - // TODO: Remove warning after API has been finalized @Override public boolean isPayloadAvailable() throws IOException { return readPayload == false && postings.getPayload() != null; @@ -129,55 +131,12 @@ public class TermSpans extends Spans { @Override public String toString() { return "spans(" + term.toString() + ")@" + - (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position); + (doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC" + : doc + " - " + (position == NO_MORE_POSITIONS ? "ENDPOS" : position)); } public PostingsEnum getPostings() { return postings; } - private static final class EmptyTermSpans extends TermSpans { - - @Override - public boolean next() { - return false; - } - - @Override - public boolean skipTo(int target) { - return false; - } - - @Override - public int doc() { - return DocIdSetIterator.NO_MORE_DOCS; - } - - @Override - public int start() { - return -1; - } - - @Override - public int end() { - return -1; - } - - @Override - public Collection getPayload() { - return null; - } - - @Override - public boolean isPayloadAvailable() { - return false; - } - - @Override - public long cost() { - return 0; - } - } - - public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java b/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java index 20f20b0e626..8e98eb13812 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java @@ -18,14 +18,18 @@ /** * The calculus of spans. * - *

A span is a <doc,startPosition,endPosition> tuple.

+ *

A span is a <doc,startPosition,endPosition> tuple that is enumerated by + * class {@link org.apache.lucene.search.spans.Spans Spans}. + *

* *

The following span query operators are implemented: * *

    * *
  • A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans - * containing a particular {@link org.apache.lucene.index.Term Term}.
  • + * containing a particular {@link org.apache.lucene.index.Term Term}. + * This should not be used for terms that are indexed at position Integer.MAX_VALUE. + * * *
  • A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans * which occur near one another, and can be used to implement things like diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java index d57c284df1f..74d7b7fa09f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Version.java +++ b/lucene/core/src/java/org/apache/lucene/util/Version.java @@ -46,6 +46,13 @@ public final class Version { @Deprecated public static final Version LUCENE_5_1_0 = new Version(5, 1, 0); + /** + * Match settings and bugs in Lucene's 5.2.0 release. + * @deprecated Use latest + */ + @Deprecated + public static final Version LUCENE_5_2_0 = new Version(5, 2, 0); + /** Match settings and bugs in Lucene's 6.0 release. *

    * Use this to get the latest & greatest settings, bug diff --git a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index b9b1f22c94c..ba8c8b746b0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -217,6 +217,9 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { public void testNoWaitClose() throws IOException { Directory directory = newDirectory(); + if (directory instanceof MockDirectoryWrapper) { + ((MockDirectoryWrapper) directory).setPreventDoubleWrite(false); + } Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); @@ -248,7 +251,6 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); writer.addDocument(doc); - writer.commit(); try { writer.commit(); @@ -267,7 +269,8 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { setOpenMode(OpenMode.APPEND). setMergePolicy(newLogMergePolicy(100)). // Force excessive merging: - setMaxBufferedDocs(2) + setMaxBufferedDocs(2). + setCommitOnClose(false) ); } writer.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java index 1057e4ffff0..28ce0d7eb58 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -172,7 +172,7 @@ public class TestFieldsReader extends LuceneTestCase { try { i.seek(getFilePointer()); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } return i; } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java new file mode 100644 index 00000000000..10023506298 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java @@ -0,0 +1,71 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIndexWriterDeleteByQuery extends LuceneTestCase { + + // LUCENE-6379 + public void testDeleteMatchAllDocsQuery() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + Document doc = new Document(); + // Norms are disabled: + doc.add(newStringField("field", "foo", Field.Store.NO)); + w.addDocument(doc); + DirectoryReader r = DirectoryReader.open(w, true); + FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field"); + assertNotNull(fi); + assertFalse(fi.hasNorms()); + assertEquals(1, r.numDocs()); + assertEquals(1, r.maxDoc()); + + w.deleteDocuments(new MatchAllDocsQuery()); + DirectoryReader r2 = DirectoryReader.openIfChanged(r); + r.close(); + + assertNotNull(r2); + assertEquals(0, r2.numDocs()); + assertEquals(0, r2.maxDoc()); + + // Confirm the omitNorms bit is in fact no longer set: + doc = new Document(); + // Norms are disabled: + doc.add(newTextField("field", "foo", Field.Store.NO)); + w.addDocument(doc); + + DirectoryReader r3 = DirectoryReader.openIfChanged(r2); + r2.close(); + assertNotNull(r3); + assertEquals(1, r3.numDocs()); + assertEquals(1, r3.maxDoc()); + + // Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery: + fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field"); + assertNotNull(fi); + assertTrue(fi.hasNorms()); + r3.close(); + w.close(); + dir.close(); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 710827325d1..dc1b2f308cd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -238,18 +238,20 @@ public class TestPositionIncrement extends LuceneTestCase { if (VERBOSE) { System.out.println("\ngetPayloadSpans test"); } - Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); - while (pspans.next()) { - if (VERBOSE) { - System.out.println("doc " + pspans.doc() + ": span " + pspans.start() - + " to " + pspans.end()); - } - Collection payloads = pspans.getPayload(); - sawZero |= pspans.start() == 0; - for (byte[] bytes : payloads) { - count++; + Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); + while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { + while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { - System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8)); + System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition() + + " to " + pspans.endPosition()); + } + Collection payloads = pspans.getPayload(); + sawZero |= pspans.startPosition() == 0; + for (byte[] bytes : payloads) { + count++; + if (VERBOSE) { + System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8)); + } } } } @@ -257,20 +259,20 @@ public class TestPositionIncrement extends LuceneTestCase { assertEquals(5, count); // System.out.println("\ngetSpans test"); - Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); count = 0; sawZero = false; - while (spans.next()) { - count++; - sawZero |= spans.start() == 0; - // System.out.println(spans.doc() + " - " + spans.start() + " - " + - // spans.end()); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + count++; + sawZero |= spans.startPosition() == 0; + // System.out.println(spans.doc() + " - " + spans.start() + " - " + + // spans.end()); + } } assertEquals(4, count); assertTrue(sawZero); - // System.out.println("\nPayloadSpanUtil test"); - sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext()); Collection pls = psu.getPayloadsForQuery(snq); diff --git a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java index 80fe83fc6cf..9f9f887de45 100644 --- a/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -160,7 +160,7 @@ public class TestPayloadTermQuery extends LuceneTestCase { assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); } CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query); + Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query); assertTrue("spans is null and it shouldn't be", spans != null); /*float score = hits.score(0); for (int i =1; i < hits.length(); i++) @@ -211,13 +211,15 @@ public class TestPayloadTermQuery extends LuceneTestCase { } assertTrue(numTens + " does not equal: " + 10, numTens == 10); CheckHits.checkExplanations(query, "field", searcher, true); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query); + Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query); assertTrue("spans is null and it shouldn't be", spans != null); //should be two matches per document int count = 0; //100 hits times 2 matches per hit, we should have 200 in count - while (spans.next()) { - count++; + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + count++; + } } assertTrue(count + " does not equal: " + 200, count == 200); } @@ -253,13 +255,15 @@ public class TestPayloadTermQuery extends LuceneTestCase { } assertTrue(numTens + " does not equal: " + 10, numTens == 10); CheckHits.checkExplanations(query, "field", searcher, true); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query); + Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query); assertTrue("spans is null and it shouldn't be", spans != null); //should be two matches per document int count = 0; //100 hits times 2 matches per hit, we should have 200 in count - while (spans.next()) { - count++; + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + count++; + } } reader.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 56afd7eb027..1fec61daf28 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -24,7 +24,6 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; @@ -42,27 +41,32 @@ final class JustCompileSearchSpans { static final class JustCompileSpans extends Spans { @Override - public int doc() { + public int docID() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public int end() { + public int nextDoc() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public boolean next() { + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int startPosition() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public boolean skipTo(int target) { + public int endPosition() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - + @Override - public int start() { + public int nextStartPosition() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -103,6 +107,36 @@ final class JustCompileSearchSpans { static final class JustCompilePayloadSpans extends Spans { + @Override + public int docID() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int startPosition() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int endPosition() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int nextStartPosition() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + @Override public Collection getPayload() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -113,31 +147,6 @@ final class JustCompileSearchSpans { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - @Override - public int doc() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public int end() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public boolean next() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public boolean skipTo(int target) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public int start() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public long cost() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -147,7 +156,7 @@ final class JustCompileSearchSpans { static final class JustCompileSpanScorer extends SpanScorer { - protected JustCompileSpanScorer(Spans spans, Weight weight, + protected JustCompileSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(spans, weight, docScorer); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java index 7490c61466e..3c20d6b73ef 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java @@ -18,19 +18,18 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; -import java.util.Collection; -import java.util.Collections; import java.util.HashMap; -import java.util.List; +import java.util.HashSet; import java.util.Map; -import java.util.TreeSet; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; -import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; /** * @@ -39,141 +38,20 @@ import org.apache.lucene.search.DocIdSetIterator; * NOTE: This should be used for testing purposes only * @lucene.internal */ -public class MultiSpansWrapper extends Spans { // can't be package private due to payloads +public class MultiSpansWrapper { - private SpanQuery query; - private List leaves; - private int leafOrd = 0; - private Spans current; - private Map termContexts; - private final int numLeaves; - - private MultiSpansWrapper(List leaves, SpanQuery query, Map termContexts) { - this.query = query; - this.leaves = leaves; - this.numLeaves = leaves.size(); - this.termContexts = termContexts; - } - - public static Spans wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) throws IOException { + public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException { + LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing + LeafReaderContext lrContext = lr.getContext(); + Query rewrittenQuery = spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly + HashSet termSet = new HashSet<>(); + rewrittenQuery.extractTerms(termSet); Map termContexts = new HashMap<>(); - TreeSet terms = new TreeSet<>(); - query.extractTerms(terms); - for (Term term : terms) { - termContexts.put(term, TermContext.build(topLevelReaderContext, term)); + for (Term term: termSet) { + TermContext termContext = TermContext.build(lrContext, term); + termContexts.put(term, termContext); } - final List leaves = topLevelReaderContext.leaves(); - if(leaves.size() == 1) { - final LeafReaderContext ctx = leaves.get(0); - return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - } - return new MultiSpansWrapper(leaves, query, termContexts); + Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts); + return actSpans; } - - @Override - public boolean next() throws IOException { - if (leafOrd >= numLeaves) { - return false; - } - if (current == null) { - final LeafReaderContext ctx = leaves.get(leafOrd); - current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - } - while(true) { - if (current.next()) { - return true; - } - if (++leafOrd < numLeaves) { - final LeafReaderContext ctx = leaves.get(leafOrd); - current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - } else { - current = null; - break; - } - } - return false; - } - - @Override - public boolean skipTo(int target) throws IOException { - if (leafOrd >= numLeaves) { - return false; - } - - int subIndex = ReaderUtil.subIndex(target, leaves); - assert subIndex >= leafOrd; - if (subIndex != leafOrd) { - final LeafReaderContext ctx = leaves.get(subIndex); - current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - leafOrd = subIndex; - } else if (current == null) { - final LeafReaderContext ctx = leaves.get(leafOrd); - current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - } - while (true) { - if (target < leaves.get(leafOrd).docBase) { - // target was in the previous slice - if (current.next()) { - return true; - } - } else if (current.skipTo(target - leaves.get(leafOrd).docBase)) { - return true; - } - if (++leafOrd < numLeaves) { - final LeafReaderContext ctx = leaves.get(leafOrd); - current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts); - } else { - current = null; - break; - } - } - - return false; - } - - @Override - public int doc() { - if (current == null) { - return DocIdSetIterator.NO_MORE_DOCS; - } - return current.doc() + leaves.get(leafOrd).docBase; - } - - @Override - public int start() { - if (current == null) { - return DocIdSetIterator.NO_MORE_DOCS; - } - return current.start(); - } - - @Override - public int end() { - if (current == null) { - return DocIdSetIterator.NO_MORE_DOCS; - } - return current.end(); - } - - @Override - public Collection getPayload() throws IOException { - if (current == null) { - return Collections.emptyList(); - } - return current.getPayload(); - } - - @Override - public boolean isPayloadAvailable() throws IOException { - if (current == null) { - return false; - } - return current.isPayloadAvailable(); - } - - @Override - public long cost() { - return Integer.MAX_VALUE; // just for tests - } - } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java index 068964a2289..f5a51e6b88c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -651,47 +651,6 @@ public class TestBasics extends LuceneTestCase { 1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797}); } - @Test - public void testSpansSkipTo() throws Exception { - SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy")); - SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy")); - Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1); - Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2); - - assertTrue(s1.next()); - assertTrue(s2.next()); - - boolean hasMore = true; - - do { - hasMore = skipToAccordingToJavaDocs(s1, s1.doc() + 1); - assertEquals(hasMore, s2.skipTo(s2.doc() + 1)); - assertEquals(s1.doc(), s2.doc()); - } while (hasMore); - } - - /** Skips to the first match beyond the current, whose document number is - * greater than or equal to target.

    Returns true iff there is such - * a match.

    Behaves as if written:

    -   *   boolean skipTo(int target) {
    -   *     do {
    -   *       if (!next())
    -   *       return false;
    -   *     } while (target > doc());
    -   *     return true;
    -   *   }
    -   * 
    - */ - private boolean skipToAccordingToJavaDocs(Spans s, int target) - throws Exception { - do { - if (!s.next()) - return false; - } while (target > s.doc()); - return true; - - } - private void checkHits(Query query, int[] results) throws IOException { CheckHits.checkHits(random(), query, "field", searcher, results); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index e05832b992b..677749b9aad 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -258,37 +258,19 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { SpanQuery q2 = new SpanTermQuery(new Term("first", "james")); SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - - assertEquals(true, span.next()); - assertEquals(s(0,0,1), s(span)); - assertEquals(true, span.next()); - assertEquals(s(1,0,1), s(span)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); - assertEquals(true, span.next()); - assertEquals(s(1,1,2), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(2,0,1), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(2,1,2), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(2,2,3), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(3,0,1), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(4,0,1), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(4,1,2), s(span)); - - assertEquals(false, span.next()); + TestSpans.tstNextSpans(span, 0,0,1); + TestSpans.tstNextSpans(span, 1,0,1); + TestSpans.tstNextSpans(span, 1,1,2); + TestSpans.tstNextSpans(span, 2,0,1); + TestSpans.tstNextSpans(span, 2,1,2); + TestSpans.tstNextSpans(span, 2,2,3); + TestSpans.tstNextSpans(span, 3,0,1); + TestSpans.tstNextSpans(span, 4,0,1); + TestSpans.tstNextSpans(span, 4,1,2); + TestSpans.tstEndSpans(span); } public void testSpans1() throws Exception { @@ -300,19 +282,22 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qA); - Spans spanB = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qB); + Spans spanA = MultiSpansWrapper.wrap(searcher.getIndexReader(), qA); + Spans spanB = MultiSpansWrapper.wrap(searcher.getIndexReader(), qB); - while (spanA.next()) { - assertTrue("spanB not still going", spanB.next()); - assertEquals("spanA not equal spanB", s(spanA), s(spanB)); + while (spanA.nextDoc() != Spans.NO_MORE_DOCS) { + assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc()); + while (spanA.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + assertEquals("spanB start position", spanA.startPosition(), spanB.nextStartPosition()); + assertEquals("spanB end position", spanA.endPosition(), spanB.endPosition()); + } + assertEquals("spanB start position", Spans.NO_MORE_POSITIONS, spanB.nextStartPosition()); } - assertTrue("spanB still going even tough spanA is done", !(spanB.next())); - + assertEquals("spanB end doc", Spans.NO_MORE_DOCS, spanB.nextDoc()); } public void testSpans2() throws Exception { - assumeTrue("Broken scoring: LUCENE-3723", + assumeTrue("Broken scoring: LUCENE-3723", searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery qA2 = new SpanTermQuery(new Term("first", "james")); @@ -322,30 +307,17 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { { new FieldMaskingSpanQuery(qA, "id"), new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - - assertEquals(true, span.next()); - assertEquals(s(0,0,1), s(span)); - assertEquals(true, span.next()); - assertEquals(s(1,1,2), s(span)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); - assertEquals(true, span.next()); - assertEquals(s(2,0,1), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(2,2,3), s(span)); - - assertEquals(true, span.next()); - assertEquals(s(3,0,1), s(span)); - - assertEquals(false, span.next()); + TestSpans.tstNextSpans(span, 0,0,1); + TestSpans.tstNextSpans(span, 1,1,2); + TestSpans.tstNextSpans(span, 2,0,1); + TestSpans.tstNextSpans(span, 2,2,3); + TestSpans.tstNextSpans(span, 3,0,1); + TestSpans.tstEndSpans(span); } - public String s(Spans span) { - return s(span.doc(), span.start(), span.end()); - } public String s(int doc, int start, int end) { return "s(" + doc + "," + start + "," + end +")"; } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 5eb29cf92aa..1af6bf1fda4 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -106,7 +106,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { } public String s(Spans span) { - return s(span.doc(), span.start(), span.end()); + return s(span.docID(), span.startPosition(), span.endPosition()); } public String s(int doc, int start, int end) { return "s(" + doc + "," + start + "," + end +")"; @@ -114,12 +114,10 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.next()); - assertEquals(s(0,0,3), s(span)); - assertEquals(true, span.next()); - assertEquals(s(1,0,4), s(span)); - assertEquals(false, span.next()); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + TestSpans.tstNextSpans(span,0,0,3); + TestSpans.tstNextSpans(span,1,0,4); + TestSpans.tstEndSpans(span); } /** @@ -127,51 +125,58 @@ public class TestNearSpansOrdered extends LuceneTestCase { * same as next -- it's only applicable in this case since we know doc * does not contain more than one span */ - public void testNearSpansSkipToLikeNext() throws Exception { + public void testNearSpansAdvanceLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.skipTo(0)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertEquals(0, span.advance(0)); + assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); - assertEquals(true, span.skipTo(1)); + assertEquals(1, span.advance(1)); + assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); - assertEquals(false, span.skipTo(2)); + assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } - public void testNearSpansNextThenSkipTo() throws Exception { + public void testNearSpansNextThenAdvance() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.next()); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); + assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); - assertEquals(true, span.skipTo(1)); + assertNotSame(Spans.NO_MORE_DOCS, span.advance(1)); + assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); - assertEquals(false, span.next()); + assertEquals(Spans.NO_MORE_DOCS, span.nextDoc()); } - public void testNearSpansNextThenSkipPast() throws Exception { + public void testNearSpansNextThenAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.next()); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); + assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); - assertEquals(false, span.skipTo(2)); + assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } - public void testNearSpansSkipPast() throws Exception { + public void testNearSpansAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(false, span.skipTo(2)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } - public void testNearSpansSkipTo0() throws Exception { + public void testNearSpansAdvanceTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.skipTo(0)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertEquals(0, span.advance(0)); + assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); } - public void testNearSpansSkipTo1() throws Exception { + public void testNearSpansAdvanceTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); - assertEquals(true, span.skipTo(1)); + Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q); + assertEquals(1, span.advance(1)); + assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index 3d02ea55974..134fc238358 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -67,12 +67,12 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq; Spans spans; stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); - spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq); + spans = MultiSpansWrapper.wrap(indexReader, stq); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 1, 1, 1); stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); - spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq); + spans = MultiSpansWrapper.wrap(indexReader, stq); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 0, 0, 0); } @@ -83,7 +83,7 @@ public class TestPayloadSpans extends LuceneTestCase { SpanFirstQuery sfq; match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); sfq = new SpanFirstQuery(match, 2); - Spans spans = MultiSpansWrapper.wrap(indexReader.getContext(), sfq); + Spans spans = MultiSpansWrapper.wrap(indexReader, sfq); checkSpans(spans, 109, 1, 1, 1); //Test more complicated subclause SpanQuery[] clauses = new SpanQuery[2]; @@ -91,11 +91,11 @@ public class TestPayloadSpans extends LuceneTestCase { clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); match = new SpanNearQuery(clauses, 0, true); sfq = new SpanFirstQuery(match, 2); - checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1); match = new SpanNearQuery(clauses, 0, false); sfq = new SpanFirstQuery(match, 2); - checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1); } @@ -119,7 +119,7 @@ public class TestPayloadSpans extends LuceneTestCase { writer.close(); - checkSpans(MultiSpansWrapper.wrap(reader.getContext(), snq), 1,new int[]{2}); + checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2}); reader.close(); directory.close(); } @@ -129,10 +129,8 @@ public class TestPayloadSpans extends LuceneTestCase { Spans spans; IndexSearcher searcher = getSearcher(); stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), stq); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 0, null); - + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq); + assertNull(spans); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); @@ -140,7 +138,7 @@ public class TestPayloadSpans extends LuceneTestCase { clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); @@ -151,7 +149,7 @@ public class TestPayloadSpans extends LuceneTestCase { spanNearQuery = new SpanNearQuery(clauses, 6, true); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); @@ -174,7 +172,7 @@ public class TestPayloadSpans extends LuceneTestCase { // yy within 6 of xx within 6 of rr - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); closeIndexReader.close(); @@ -205,7 +203,7 @@ public class TestPayloadSpans extends LuceneTestCase { clauses3[1] = snq; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); @@ -243,7 +241,7 @@ public class TestPayloadSpans extends LuceneTestCase { SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); + spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{8, 8}); closeIndexReader.close(); @@ -267,16 +265,18 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); - Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - - for (final byte [] payload : payloads) { - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + Collection payloads = spans.getPayload(); + + for (final byte [] payload : payloads) { + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + } } } } @@ -303,15 +303,18 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - for (final byte[] payload : payloads) { - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + Collection payloads = spans.getPayload(); + + for (final byte [] payload : payloads) { + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + } } } } @@ -338,16 +341,18 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - - for (final byte [] payload : payloads) { - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + Collection payloads = spans.getPayload(); + + for (final byte [] payload : payloads) { + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); + } } } } @@ -395,31 +400,22 @@ public class TestPayloadSpans extends LuceneTestCase { //each position match should have a span associated with it, since there is just one underlying term query, there should //only be one entry in the span int seen = 0; - while (spans.next() == true) - { - //if we expect payloads, then isPayloadAvailable should be true - if (expectedNumPayloads > 0) { - assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable() - + " and it should be: " + (expectedNumPayloads > 0), - spans.isPayloadAvailable() == true); - } else { - assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false); - } - //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads); - for (final byte [] thePayload : payload) { - assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength, - thePayload.length == expectedPayloadLength); - assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte); - + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable()); + //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + assertEquals("payload size", expectedNumPayloads, payload.size()); + for (final byte [] thePayload : payload) { + assertEquals("payload length", expectedPayloadLength, thePayload.length); + assertEquals("payload first byte", expectedFirstByte, thePayload[0]); + } } - + seen++; } - seen++; } - assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans); + assertEquals("expectedNumSpans", expectedNumSpans, seen); } private IndexSearcher getSearcher() throws Exception { @@ -446,27 +442,28 @@ public class TestPayloadSpans extends LuceneTestCase { private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { int cnt = 0; - while (spans.next() == true) { - if(VERBOSE) - System.out.println("\nSpans Dump --"); - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - if(VERBOSE) { - System.out.println("payloads for span:" + payload.size()); - for (final byte [] bytes : payload) { - System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " - + new String(bytes, StandardCharsets.UTF_8)); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + if(VERBOSE) + System.out.println("\nSpans Dump --"); + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + if(VERBOSE) { + System.out.println("payloads for span:" + payload.size()); + for (final byte [] bytes : payload) { + System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " " + + new String(bytes, StandardCharsets.UTF_8)); + } } + assertEquals("payload size", numPayloads[cnt], payload.size()); + } else { // no payload available + assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 ); } - - assertEquals(numPayloads[cnt],payload.size()); - } else { - assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 ); + cnt++; } - cnt++; } - assertEquals(numSpans, cnt); + assertEquals("expected numSpans", numSpans, cnt); } final class PayloadAnalyzer extends Analyzer { diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index 577679d0a41..5a8bad99e24 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -22,7 +22,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; @@ -201,117 +200,55 @@ public class TestSpans extends LuceneTestCase { makeSpanTermQuery("t3") }, slop, ordered); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq); - assertTrue("first range", spans.next()); - assertEquals("first doc", 11, spans.doc()); - assertEquals("first start", 0, spans.start()); - assertEquals("first end", 4, spans.end()); + assertEquals("first doc", 11, spans.nextDoc()); + assertEquals("first start", 0, spans.nextStartPosition()); + assertEquals("first end", 4, spans.endPosition()); - assertTrue("second range", spans.next()); - assertEquals("second doc", 11, spans.doc()); - assertEquals("second start", 2, spans.start()); - assertEquals("second end", 6, spans.end()); + assertEquals("second start", 2, spans.nextStartPosition()); + assertEquals("second end", 6, spans.endPosition()); - assertFalse("third range", spans.next()); + tstEndSpans(spans); } - public void testSpanNearUnOrdered() throws Exception { - //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test - SpanNearQuery snq; - snq = new SpanNearQuery( + SpanNearQuery senq; + senq = new SpanNearQuery( new SpanQuery[] { makeSpanTermQuery("u1"), makeSpanTermQuery("u2") }, 0, false); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 4, spans.doc()); - assertEquals("start", 1, spans.start()); - assertEquals("end", 3, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 5, spans.doc()); - assertEquals("start", 2, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 8, spans.doc()); - assertEquals("start", 2, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 9, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 2, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 10, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 2, spans.end()); - assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false); + Spans spans = MultiSpansWrapper.wrap(reader, senq); + tstNextSpans(spans, 4, 1, 3); + tstNextSpans(spans, 5, 2, 4); + tstNextSpans(spans, 8, 2, 4); + tstNextSpans(spans, 9, 0, 2); + tstNextSpans(spans, 10, 0, 2); + tstEndSpans(spans); SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"), makeSpanTermQuery("u2")}, 0, false); - snq = new SpanNearQuery( + senq = new SpanNearQuery( new SpanQuery[] { u1u2, makeSpanTermQuery("u2") }, 1, false); - spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 4, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 3, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - //unordered spans can be subsets - assertEquals("doc", 4, spans.doc()); - assertEquals("start", 1, spans.start()); - assertEquals("end", 3, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 5, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 5, spans.doc()); - assertEquals("start", 2, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 8, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 4, spans.end()); - - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 8, spans.doc()); - assertEquals("start", 2, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 9, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 2, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 9, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 4, spans.end()); - - assertTrue("Does not have next and it should", spans.next()); - assertEquals("doc", 10, spans.doc()); - assertEquals("start", 0, spans.start()); - assertEquals("end", 2, spans.end()); - - assertTrue("Has next and it shouldn't", spans.next() == false); + spans = MultiSpansWrapper.wrap(reader, senq); + tstNextSpans(spans, 4, 0, 3); + tstNextSpans(spans, 4, 1, 3); // unordered spans can be subsets + tstNextSpans(spans, 5, 0, 4); + tstNextSpans(spans, 5, 2, 4); + tstNextSpans(spans, 8, 0, 4); + tstNextSpans(spans, 8, 2, 4); + tstNextSpans(spans, 9, 0, 2); + tstNextSpans(spans, 9, 0, 4); + tstNextSpans(spans, 10, 0, 2); + tstEndSpans(spans); } @@ -321,21 +258,40 @@ public class TestSpans extends LuceneTestCase { for (int i = 0; i < terms.length; i++) { sqa[i] = makeSpanTermQuery(terms[i]); } - return MultiSpansWrapper.wrap(searcher.getTopReaderContext(), new SpanOrQuery(sqa)); + return MultiSpansWrapper.wrap(searcher.getIndexReader(), new SpanOrQuery(sqa)); } - private void tstNextSpans(Spans spans, int doc, int start, int end) - throws Exception { - assertTrue("next", spans.next()); - assertEquals("doc", doc, spans.doc()); - assertEquals("start", start, spans.start()); - assertEquals("end", end, spans.end()); + public static void tstNextSpans(Spans spans, int doc, int start, int end) throws IOException { + if (spans.docID() >= doc) { + assertEquals("docId", doc, spans.docID()); + } else { // nextDoc needed before testing start/end + if (spans.docID() >= 0) { + assertEquals("nextStartPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.nextStartPosition()); + assertEquals("endPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.endPosition()); + } + assertEquals("nextDoc", doc, spans.nextDoc()); + if (doc != Spans.NO_MORE_DOCS) { + assertEquals("first startPosition", -1, spans.startPosition()); + assertEquals("first endPosition", -1, spans.endPosition()); + } + } + if (doc != Spans.NO_MORE_DOCS) { + assertEquals("nextStartPosition", start, spans.nextStartPosition()); + assertEquals("startPosition", start, spans.startPosition()); + assertEquals("endPosition", end, spans.endPosition()); + } + } + + public static void tstEndSpans(Spans spans) throws Exception { + if (spans != null) { // null Spans is empty + tstNextSpans(spans, Spans.NO_MORE_DOCS, -2, -2); // start and end positions will be ignored + } } public void testSpanOrEmpty() throws Exception { Spans spans = orSpans(new String[0]); - assertFalse("empty next", spans.next()); - + tstEndSpans(spans); + SpanOrQuery a = new SpanOrQuery(); SpanOrQuery b = new SpanOrQuery(); assertTrue("empty should equal", a.equals(b)); @@ -344,24 +300,7 @@ public class TestSpans extends LuceneTestCase { public void testSpanOrSingle() throws Exception { Spans spans = orSpans(new String[] {"w5"}); tstNextSpans(spans, 0, 4, 5); - assertFalse("final next", spans.next()); - } - - public void testSpanOrMovesForward() throws Exception { - Spans spans = orSpans(new String[] {"w1", "xx"}); - - spans.next(); - int doc = spans.doc(); - assertEquals(0, doc); - - spans.skipTo(0); - doc = spans.doc(); - - // LUCENE-1583: - // according to Spans, a skipTo to the same doc or less - // should still call next() on the underlying Spans - assertEquals(1, doc); - + tstEndSpans(spans); } public void testSpanOrDouble() throws Exception { @@ -370,17 +309,15 @@ public class TestSpans extends LuceneTestCase { tstNextSpans(spans, 2, 3, 4); tstNextSpans(spans, 3, 4, 5); tstNextSpans(spans, 7, 3, 4); - assertFalse("final next", spans.next()); + tstEndSpans(spans); } - public void testSpanOrDoubleSkip() throws Exception { + public void testSpanOrDoubleAdvance() throws Exception { Spans spans = orSpans(new String[] {"w5", "yy"}); - assertTrue("initial skipTo", spans.skipTo(3)); - assertEquals("doc", 3, spans.doc()); - assertEquals("start", 4, spans.start()); - assertEquals("end", 5, spans.end()); + assertEquals("initial advance", 3, spans.advance(3)); + tstNextSpans(spans, 3, 4, 5); tstNextSpans(spans, 7, 3, 4); - assertFalse("final next", spans.next()); + tstEndSpans(spans); } public void testSpanOrUnused() throws Exception { @@ -389,7 +326,7 @@ public class TestSpans extends LuceneTestCase { tstNextSpans(spans, 2, 3, 4); tstNextSpans(spans, 3, 4, 5); tstNextSpans(spans, 7, 3, 4); - assertFalse("final next", spans.next()); + tstEndSpans(spans); } public void testSpanOrTripleSameDoc() throws Exception { @@ -400,7 +337,7 @@ public class TestSpans extends LuceneTestCase { tstNextSpans(spans, 11, 3, 4); tstNextSpans(spans, 11, 4, 5); tstNextSpans(spans, 11, 5, 6); - assertFalse("final next", spans.next()); + tstEndSpans(spans); } public void testSpanScorerZeroSloppyFreq() throws Exception { @@ -439,8 +376,8 @@ public class TestSpans extends LuceneTestCase { assertEquals("first doc number", spanScorer.docID() + ctx.docBase, 11); float score = spanScorer.score(); assertTrue("first doc score should be zero, " + score, score == 0.0f); - } else { - assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); + } else { + assertTrue("no second doc", spanScorer == null || spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); } } } @@ -542,11 +479,15 @@ public class TestSpans extends LuceneTestCase { SpanTermQuery iq = new SpanTermQuery(new Term(field, include)); SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude)); SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post); - Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); + Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq); int i = 0; - while (spans.next()){ - i++; + if (spans != null) { + while (spans.nextDoc() != Spans.NO_MORE_DOCS){ + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + i++; + } + } } return i; } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpansEnum.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpansEnum.java new file mode 100644 index 00000000000..1f632ed3642 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpansEnum.java @@ -0,0 +1,187 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests Spans (v2) + * + */ +public class TestSpansEnum extends LuceneTestCase { + private static IndexSearcher searcher; + private static IndexReader reader; + private static Directory directory; + + static final class SimplePayloadFilter extends TokenFilter { + int pos; + final PayloadAttribute payloadAttr; + final CharTermAttribute termAttr; + + public SimplePayloadFilter(TokenStream input) { + super(input); + pos = 0; + payloadAttr = input.addAttribute(PayloadAttribute.class); + termAttr = input.addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8))); + pos++; + return true; + } else { + return false; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + pos = 0; + } + } + + static Analyzer simplePayloadAnalyzer; + @BeforeClass + public static void beforeClass() throws Exception { + simplePayloadAnalyzer = new Analyzer() { + @Override + public TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer)); + } + }; + + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, + newIndexWriterConfig(simplePayloadAnalyzer) + .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy())); + //writer.infoStream = System.out; + for (int i = 0; i < 10; i++) { + Document doc = new Document(); + doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES)); + writer.addDocument(doc); + } + for (int i = 100; i < 110; i++) { + Document doc = new Document(); // doc id 10-19 have 100-109 + doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES)); + writer.addDocument(doc); + } + reader = writer.getReader(); + searcher = newSearcher(reader); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + reader.close(); + directory.close(); + searcher = null; + reader = null; + directory = null; + simplePayloadAnalyzer = null; + } + + private void checkHits(Query query, int[] results) throws IOException { + CheckHits.checkHits(random(), query, "field", searcher, results); + } + + SpanTermQuery spanTQ(String term) { + return new SpanTermQuery(new Term("field", term)); + } + + @Test + public void testSpansEnumOr1() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t2 = spanTQ("two"); + SpanOrQuery soq = new SpanOrQuery(t1, t2); + checkHits(soq, new int[] {1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); + } + + @Test + public void testSpansEnumOr2() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t11 = spanTQ("eleven"); + SpanOrQuery soq = new SpanOrQuery(t1, t11); + checkHits(soq, new int[] {1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); + } + + @Test + public void testSpansEnumOr3() throws Exception { + SpanTermQuery t12 = spanTQ("twelve"); + SpanTermQuery t11 = spanTQ("eleven"); + SpanOrQuery soq = new SpanOrQuery(t12, t11); + checkHits(soq, new int[] {}); + } + + @Test + public void testSpansEnumOrNot1() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t2 = spanTQ("two"); + SpanOrQuery soq = new SpanOrQuery(t1, t2); + SpanNotQuery snq = new SpanNotQuery(soq, t1); + checkHits(snq, new int[] {2,12}); + } + + @Test + public void testSpansEnumNotBeforeAfter1() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t100 = spanTQ("hundred"); + SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 0); + checkHits(snq, new int[] {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // include all "one hundred ..." + } + + @Test + public void testSpansEnumNotBeforeAfter2() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t100 = spanTQ("hundred"); + SpanNotQuery snq = new SpanNotQuery(t100, t1, 1, 0); + checkHits(snq, new int[] {}); // exclude all "one hundred ..." + } + + @Test + public void testSpansEnumNotBeforeAfter3() throws Exception { + SpanTermQuery t1 = spanTQ("one"); + SpanTermQuery t100 = spanTQ("hundred"); + SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 1); + checkHits(snq, new int[] {10, 12, 13, 14, 15, 16, 17, 18, 19}); // exclude "one hundred one" + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/RandomSamplingFacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/RandomSamplingFacetsCollector.java index a7e3519087e..dcc366a43c3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/RandomSamplingFacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/RandomSamplingFacetsCollector.java @@ -215,7 +215,7 @@ public class RandomSamplingFacetsCollector extends FacetsCollector { return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 849f7c6a2bf..80160ace1c9 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -308,10 +308,11 @@ public class WeightedSpanTermExtractor { final Spans spans = q.getSpans(context, acceptDocs, termContexts); // collect span positions - while (spans.next()) { - spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1)); + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1)); + } } - } if (spanPositions.size() == 0) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java index 0968dec1aaf..89b39bb414a 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java @@ -681,7 +681,7 @@ public class TestMultiTermHighlighting extends LuceneTestCase { } }; SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); - Query query = new SpanNearQuery(new SpanQuery[] { childQuery }, 0, true); + Query query = new SpanNearQuery(new SpanQuery[] { childQuery, childQuery }, 0, false); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); String snippets[] = highlighter.highlight("body", query, searcher, topDocs); diff --git a/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java b/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java index d29260cf1b0..afd5a85be33 100644 --- a/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java +++ b/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java @@ -895,7 +895,7 @@ public class DocTermOrds implements Accountable { try { return getOrdTermsEnum(reader); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java index 9eedbc6562c..fbdfaa869b4 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java @@ -145,7 +145,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary { try { currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } return currentWeightValues.longVal(docId - starts[subIndex]); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index 3ee0efe0ee1..0b7cfeccc76 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -113,7 +113,7 @@ public class FileDictionary implements Dictionary { try { return new FileIterator(); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index ca0a0a2a944..5353ff2f466 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -2681,7 +2681,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } } } catch (Throwable e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } }; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b38b2cc7b92..dd265b81e1a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -58,6 +58,9 @@ Other Changes * SOLR-6954: Deprecated SolrClient.shutdown() method removed (Alan Woodward) +================== 5.2.0 ================== +(No Changes) + ================== 5.1.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release @@ -91,15 +94,6 @@ Upgrading from Solr 5.0 contain some LGPL-only code. Until that's resolved by Tika, you can download the .jar yourself and place it under contrib/extraction/lib. -* A twitter engineer discovered a JVM bug that causes GC pause problems. The - workaround for those problems makes certain functionality impossible, such as - running the jstat program on your Solr instance. That workaround has been - implemented in the bin/solr start scripts. If you need the missing java - functionality, delete the "-XX:+PerfDisableSharedMem" parameter from - bin/solr.in.sh or bin/solr.in.cmd. - - http://www.evanjones.ca/jvm-mmap-pause.html - Detailed Change List ---------------------- @@ -350,11 +344,8 @@ Bug Fixes * SOLR-7309: Make bin/solr, bin/post work when Solr installation directory contains spaces (Ramkumar Aiyengar, Martijn Koster) -* SOLR-7319: Workaround for the "Four Month Bug" GC pause problem discovered - by a twitter software engineer. This causes GC pauses when JVM statistics - are left enabled and there is heavy MMAP write activity. - http://www.evanjones.ca/jvm-mmap-pause.html - (Shawn Heisey) +* SOLR-6924: The config API forcefully refreshes all replicas in the collection to ensure all are + updated (Noble Paul) Optimizations ---------------------- @@ -368,6 +359,9 @@ Optimizations * SOLR-7239: improved performance of min & max in StatsComponent, as well as situations where local params disable all stats (hossman) + * SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed + (Stephan Lagraulet via Varun Thacker) + Other Changes ---------------------- diff --git a/solr/bin/solr.in.cmd b/solr/bin/solr.in.cmd index ff11f62da82..905284ebc2b 100644 --- a/solr/bin/solr.in.cmd +++ b/solr/bin/solr.in.cmd @@ -40,7 +40,6 @@ set GC_TUNE=-XX:NewRatio=3 ^ -XX:+UseCMSInitiatingOccupancyOnly ^ -XX:CMSInitiatingOccupancyFraction=50 ^ -XX:CMSMaxAbortablePrecleanTime=6000 ^ - -XX:+PerfDisableSharedMem ^ -XX:+CMSParallelRemarkEnabled ^ -XX:+ParallelRefProcEnabled diff --git a/solr/bin/solr.in.sh b/solr/bin/solr.in.sh index 9d33fca5d00..ccaea12f37e 100644 --- a/solr/bin/solr.in.sh +++ b/solr/bin/solr.in.sh @@ -37,7 +37,6 @@ GC_TUNE="-XX:NewRatio=3 \ -XX:PretenureSizeThreshold=64m \ -XX:+UseCMSInitiatingOccupancyOnly \ -XX:CMSInitiatingOccupancyFraction=50 \ --XX:+PerfDisableSharedMem \ -XX:CMSMaxAbortablePrecleanTime=6000 \ -XX:+CMSParallelRemarkEnabled \ -XX:+ParallelRefProcEnabled" diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java index 8e30d1a880e..e67efadbf49 100644 --- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java +++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java @@ -103,7 +103,7 @@ public class RegexRulesPasswordProvider implements PasswordProvider { } is.close(); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } return rules; } diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 15f1118770a..15f7c46e525 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -2175,9 +2175,10 @@ public final class ZkController { * * @return true on success */ - public static boolean persistConfigResourceToZooKeeper(ZkSolrResourceLoader zkLoader, int znodeVersion, + public static int persistConfigResourceToZooKeeper(ZkSolrResourceLoader zkLoader, int znodeVersion, String resourceName, byte[] content, boolean createIfNotExists) { + int latestVersion = znodeVersion; final ZkController zkController = zkLoader.getZkController(); final SolrZkClient zkClient = zkController.getZkClient(); final String resourceLocation = zkLoader.getConfigSetZkPath() + "/" + resourceName; @@ -2185,17 +2186,19 @@ public final class ZkController { try { try { zkClient.setData(resourceLocation, content, znodeVersion, true); + latestVersion = znodeVersion + 1;// if the set succeeded , it should have incremented the version by one always log.info("Persisted config data to node {} ", resourceLocation); touchConfDir(zkLoader); } catch (NoNodeException e) { if (createIfNotExists) { try { zkClient.create(resourceLocation, content, CreateMode.PERSISTENT, true); + latestVersion = 0;//just created so version must be zero touchConfDir(zkLoader); } catch (KeeperException.NodeExistsException nee) { try { Stat stat = zkClient.exists(resourceLocation, null, true); - log.info("failed to set data version in zk is {0} and expected version is {1} ", stat.getVersion(), znodeVersion); + log.info("failed to set data version in zk is {} and expected version is {} ", stat.getVersion(), znodeVersion); } catch (Exception e1) { log.warn("could not get stat"); } @@ -2227,7 +2230,7 @@ public final class ZkController { log.error(msg, e); throw new SolrException(ErrorCode.SERVER_ERROR, msg, e); } - return true; + return latestVersion; } public static void touchConfDir(ZkSolrResourceLoader zkLoader) { diff --git a/solr/core/src/java/org/apache/solr/core/ConfigOverlay.java b/solr/core/src/java/org/apache/solr/core/ConfigOverlay.java index 6ace75b92bf..46cbfa2e27d 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigOverlay.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigOverlay.java @@ -187,14 +187,14 @@ public class ConfigOverlay implements MapSerializable { public static final String RESOURCE_NAME = "configoverlay.json"; - private static final Long STR_ATTR = 0L; + /*private static final Long STR_ATTR = 0L; private static final Long STR_NODE = 1L; private static final Long BOOL_ATTR = 10L; private static final Long BOOL_NODE = 11L; private static final Long INT_ATTR = 20L; private static final Long INT_NODE = 21L; private static final Long FLOAT_ATTR = 30L; - private static final Long FLOAT_NODE = 31L; + private static final Long FLOAT_NODE = 31L;*/ private static Map editable_prop_map; //The path maps to the xml xpath and value of 1 means it is a tag with a string value and value diff --git a/solr/core/src/java/org/apache/solr/core/RequestParams.java b/solr/core/src/java/org/apache/solr/core/RequestParams.java index 17f4123f841..aae2a0b804d 100644 --- a/solr/core/src/java/org/apache/solr/core/RequestParams.java +++ b/solr/core/src/java/org/apache/solr/core/RequestParams.java @@ -148,6 +148,7 @@ public class RequestParams implements MapSerializable { ZkSolrResourceLoader resourceLoader = (ZkSolrResourceLoader) loader; try { Stat stat = resourceLoader.getZkController().getZkClient().exists(resourceLoader.getConfigSetZkPath() + "/" + RequestParams.RESOURCE, null, true); + log.debug("latest version of {} in ZK is : {}", resourceLoader.getConfigSetZkPath() + "/" + RequestParams.RESOURCE, stat == null ? "": stat.getVersion()); if (stat == null) { requestParams = new RequestParams(Collections.EMPTY_MAP, -1); } else if (requestParams == null || stat.getVersion() > requestParams.getZnodeVersion()) { diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index 1126a919e54..121165ec8ff 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -77,6 +77,7 @@ import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.apache.solr.core.ConfigOverlay.ZNODEVER; import static org.apache.solr.core.SolrConfig.PluginOpts.LAZY; import static org.apache.solr.core.SolrConfig.PluginOpts.MULTI_OK; import static org.apache.solr.core.SolrConfig.PluginOpts.NOOP; @@ -819,7 +820,7 @@ public class SolrConfig extends Config implements MapSerializable { @Override public Map toMap() { LinkedHashMap result = new LinkedHashMap(); - if (getZnodeVersion() > -1) result.put("znodeVersion", getZnodeVersion()); + if (getZnodeVersion() > -1) result.put(ZNODEVER, getZnodeVersion()); result.put("luceneMatchVersion", luceneMatchVersion); result.put("updateHandler", getUpdateHandlerInfo().toMap()); Map m = new LinkedHashMap(); diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 4afaf8a65a7..3988f576723 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -66,6 +66,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.solr.client.solrj.impl.BinaryResponseParser; import org.apache.solr.cloud.CloudDescriptor; @@ -89,6 +90,7 @@ import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.admin.ShowFileRequestHandler; import org.apache.solr.handler.component.HighlightComponent; import org.apache.solr.handler.component.SearchComponent; +import org.apache.solr.logging.MDCUtils; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.response.BinaryResponseWriter; @@ -132,7 +134,6 @@ import org.apache.solr.update.processor.RunUpdateProcessorFactory; import org.apache.solr.update.processor.UpdateRequestProcessorChain; import org.apache.solr.update.processor.UpdateRequestProcessorChain.ProcessorInfo; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; -import org.apache.solr.util.ConcurrentLRUCache; import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.PropertiesInputStream; import org.apache.solr.util.RefCounted; @@ -691,6 +692,8 @@ public final class SolrCore implements SolrInfoMBean, Closeable { public SolrCore(String name, String dataDir, SolrConfig config, IndexSchema schema, CoreDescriptor cd, UpdateHandler updateHandler, IndexDeletionPolicyWrapper delPolicy, SolrCore prev) { coreDescriptor = cd; this.setName( name ); + MDCUtils.setCore(name); // show the core name in the error logs + resourceLoader = config.getResourceLoader(); this.solrConfig = config; @@ -2077,13 +2080,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable { HashMap m= new HashMap<>(); m.put("xml", new XMLResponseWriter()); m.put("standard", m.get("xml")); - m.put("json", new JSONResponseWriter()); + m.put(CommonParams.JSON, new JSONResponseWriter()); m.put("python", new PythonResponseWriter()); m.put("php", new PHPResponseWriter()); m.put("phps", new PHPSerializedResponseWriter()); m.put("ruby", new RubyResponseWriter()); m.put("raw", new RawResponseWriter()); - m.put("javabin", new BinaryResponseWriter()); + m.put(CommonParams.JAVABIN, new BinaryResponseWriter()); m.put("csv", new CSVResponseWriter()); m.put("xsort", new SortingResponseWriter()); m.put("schema.xml", new SchemaXmlResponseWriter()); @@ -2463,12 +2466,12 @@ public final class SolrCore implements SolrInfoMBean, Closeable { zkSolrResourceLoader.getZkController().registerConfListenerForCore( zkSolrResourceLoader.getConfigSetZkPath(), this, - getListener(this, zkSolrResourceLoader)); + getConfListener(this, zkSolrResourceLoader)); } - private static Runnable getListener(SolrCore core, ZkSolrResourceLoader zkSolrResourceLoader) { + public static Runnable getConfListener(SolrCore core, ZkSolrResourceLoader zkSolrResourceLoader) { final String coreName = core.getName(); final CoreContainer cc = core.getCoreDescriptor().getCoreContainer(); final String overlayPath = zkSolrResourceLoader.getConfigSetZkPath() + "/" + ConfigOverlay.RESOURCE_NAME; @@ -2506,9 +2509,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable { cc.reload(coreName); return; } - //some files in conf directoy has changed other than schema.xml, - // solrconfig.xml. so fire event listeners - + //some files in conf directory may have other than managedschema, overlay, params try (SolrCore core = cc.solrCores.getCoreFromAnyList(coreName, true)) { if (core == null || core.isClosed()) return; for (Runnable listener : core.confListeners) { diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index cbe07df7284..16057f842e1 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -362,8 +362,10 @@ public class IndexFetcher { indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); try { - - if (isIndexStale(indexDir)) { + + //We will compare all the index files from the master vs the index files on disk to see if there is a mismatch + //in the metadata. If there is a mismatch for the same index file then we download the entire index again. + if (!isFullCopyNeeded && isIndexStale(indexDir)) { isFullCopyNeeded = true; } diff --git a/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java b/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java index 86571020bdc..b6fb550a551 100644 --- a/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java @@ -21,6 +21,7 @@ package org.apache.solr.handler; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -29,35 +30,63 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import com.google.common.collect.ImmutableSet; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrRequest; +import org.apache.solr.client.solrj.SolrResponse; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.cloud.ZkCLI; import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkSolrResourceLoader; import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.ConfigOverlay; import org.apache.solr.core.PluginInfo; import org.apache.solr.core.ImplicitPlugins; import org.apache.solr.core.RequestParams; import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.handler.admin.CollectionsHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; +import org.apache.solr.response.BinaryResponseWriter; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.SchemaManager; import org.apache.solr.util.CommandOperation; +import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static java.util.Collections.singletonList; +import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; import static org.apache.solr.common.params.CoreAdminParams.NAME; import static org.apache.solr.common.util.StrUtils.formatString; import static org.apache.solr.core.ConfigOverlay.NOT_EDITABLE; +import static org.apache.solr.core.ConfigOverlay.ZNODEVER; import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_CLASS; import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_NAME; import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_NAME_IN_OVERLAY; @@ -67,6 +96,7 @@ public class SolrConfigHandler extends RequestHandlerBase { public static final Logger log = LoggerFactory.getLogger(SolrConfigHandler.class); public static final boolean configEditing_disabled = Boolean.getBoolean("disable.configEdit"); private static final Map namedPlugins; + private Lock reloadLock = new ReentrantLock(true); static { Map map = new HashMap<>(); @@ -99,7 +129,7 @@ public class SolrConfigHandler extends RequestHandlerBase { } - private static class Command { + private class Command { private final SolrQueryRequest req; private final SolrQueryResponse resp; private final String method; @@ -122,6 +152,7 @@ public class SolrConfigHandler extends RequestHandlerBase { private void handleGET() { if (parts.size() == 1) { + //this is the whole config. sent out the whole payload resp.add("config", getConfigDetails()); } else { if (ConfigOverlay.NAME.equals(parts.get(1))) { @@ -131,9 +162,9 @@ public class SolrConfigHandler extends RequestHandlerBase { RequestParams params = req.getCore().getSolrConfig().getRequestParams(); MapSolrParams p = params.getParams(parts.get(2)); Map m = new LinkedHashMap<>(); - m.put(ConfigOverlay.ZNODEVER, params.getZnodeVersion()); + m.put(ZNODEVER, params.getZnodeVersion()); if (p != null) { - m.put(RequestParams.NAME, ZkNodeProps.makeMap(parts.get(2), p.getMap())); + m.put(RequestParams.NAME, makeMap(parts.get(2), p.getMap())); } resp.add(SolrQueryResponse.NAME, m); } else { @@ -141,8 +172,53 @@ public class SolrConfigHandler extends RequestHandlerBase { } } else { - Map m = getConfigDetails(); - resp.add("config", ZkNodeProps.makeMap(parts.get(1), m.get(parts.get(1)))); + if (ZNODEVER.equals(parts.get(1))) { + resp.add(ZNODEVER, ZkNodeProps.makeMap( + ConfigOverlay.NAME, req.getCore().getSolrConfig().getOverlay().getZnodeVersion(), + RequestParams.NAME, req.getCore().getSolrConfig().getRequestParams().getZnodeVersion())); + boolean checkStale = false; + int expectedVersion = req.getParams().getInt(ConfigOverlay.NAME, -1); + int actualVersion = req.getCore().getSolrConfig().getOverlay().getZnodeVersion(); + if (expectedVersion > actualVersion) { + log.info("expecting overlay version {} but my version is {}", expectedVersion, actualVersion); + checkStale = true; + } else if (expectedVersion != -1) { + log.info("I already have the expected version {} of config", expectedVersion); + } + expectedVersion = req.getParams().getInt(RequestParams.NAME, -1); + actualVersion = req.getCore().getSolrConfig().getRequestParams().getZnodeVersion(); + if (expectedVersion > actualVersion) { + log.info("expecting params version {} but my version is {}", expectedVersion, actualVersion); + checkStale = true; + } else if (expectedVersion != -1) { + log.info("I already have the expected version {} of params", expectedVersion); + } + if (checkStale && req.getCore().getResourceLoader() instanceof ZkSolrResourceLoader) { + new Thread(SolrConfigHandler.class.getSimpleName() + "-refreshconf") { + @Override + public void run() { + if (!reloadLock.tryLock()) { + log.info("Another reload is in progress . Not doing anything"); + return; + } + try { + log.info("Trying to update my configs"); + SolrCore.getConfListener(req.getCore(), (ZkSolrResourceLoader) req.getCore().getResourceLoader()).run(); + } catch (Exception e) { + log.error("Unable to refresh conf ", e); + } finally { + reloadLock.unlock(); + } + } + }.start(); + } else { + log.info("checkStale {} , resourceloader {}", checkStale, req.getCore().getResourceLoader().getClass().getName()); + } + + } else { + Map m = getConfigDetails(); + resp.add("config", makeMap(parts.get(1), m.get(parts.get(1)))); + } } } } @@ -277,8 +353,15 @@ public class SolrConfigHandler extends RequestHandlerBase { if (ops.isEmpty()) { ZkController.touchConfDir(zkLoader); } else { - ZkController.persistConfigResourceToZooKeeper(zkLoader, params.getZnodeVersion(), - RequestParams.RESOURCE, params.toByteArray(), true); + log.info("persisting params version : {}", params.toMap()); + int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, + params.getZnodeVersion(), + RequestParams.RESOURCE, + params.toByteArray(), true); + waitForAllReplicasState(req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(), + req.getCore().getCoreDescriptor().getCoreContainer().getZkController(), + RequestParams.NAME, + latestVersion, 30); } } else { @@ -326,17 +409,20 @@ public class SolrConfigHandler extends RequestHandlerBase { } List errs = CommandOperation.captureErrors(ops); if (!errs.isEmpty()) { - log.info("Failed to run commands errors are {}", StrUtils.join(errs, ',')); + log.info("Failed to run commands. errors are {}", StrUtils.join(errs, ',')); resp.add(CommandOperation.ERR_MSGS, errs); return; } SolrResourceLoader loader = req.getCore().getResourceLoader(); if (loader instanceof ZkSolrResourceLoader) { - ZkController.persistConfigResourceToZooKeeper((ZkSolrResourceLoader) loader, overlay.getZnodeVersion(), + int latestVersion = ZkController.persistConfigResourceToZooKeeper((ZkSolrResourceLoader) loader, overlay.getZnodeVersion(), ConfigOverlay.RESOURCE_NAME, overlay.toByteArray(), true); - - log.info("Executed config commands successfully and persited to ZK {}", ops); + log.info("Executed config commands successfully and persisted to ZK {}", ops); + waitForAllReplicasState(req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(), + req.getCore().getCoreDescriptor().getCoreContainer().getZkController(), + ConfigOverlay.NAME, + latestVersion, 30); } else { SolrResourceLoader.persistConfLocally(loader, ConfigOverlay.RESOURCE_NAME, overlay.toByteArray()); req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName()); @@ -519,7 +605,7 @@ public class SolrConfigHandler extends RequestHandlerBase { private static Set subPaths = new HashSet<>(Arrays.asList("/overlay", "/params", - "/query", "/jmx", "/requestDispatcher")); + "/query", "/jmx", "/requestDispatcher", "/znodeVersion")); static { for (SolrConfig.SolrPluginInfo solrPluginInfo : SolrConfig.plugins) @@ -556,4 +642,170 @@ public class SolrConfigHandler extends RequestHandlerBase { public static final String CREATE = "create"; private static Set cmdPrefixes = ImmutableSet.of(CREATE, UPDATE, "delete", "add"); + /** + * Block up to a specified maximum time until we see agreement on the schema + * version in ZooKeeper across all replicas for a collection. + */ + private static void waitForAllReplicasState(String collection, + ZkController zkController, + String prop, + int expectedVersion, + int maxWaitSecs) { + long startMs = System.currentTimeMillis(); + // get a list of active replica cores to query for the schema zk version (skipping this core of course) + List concurrentTasks = new ArrayList<>(); + + for (String coreUrl : getActiveReplicaCoreUrls(zkController, collection)) { + PerReplicaCallable e = new PerReplicaCallable(coreUrl, prop, expectedVersion, maxWaitSecs); + concurrentTasks.add(e); + } + if (concurrentTasks.isEmpty()) return; // nothing to wait for ... + + log.info(formatString("Waiting up to {0} secs for {1} replicas to set the property {2} to be of version {3} for collection {4}", + maxWaitSecs, concurrentTasks.size(), prop, expectedVersion, collection)); + + // use an executor service to invoke schema zk version requests in parallel with a max wait time + int poolSize = Math.min(concurrentTasks.size(), 10); + ExecutorService parallelExecutor = + Executors.newFixedThreadPool(poolSize, new DefaultSolrThreadFactory("solrHandlerExecutor")); + try { + List> results = + parallelExecutor.invokeAll(concurrentTasks, maxWaitSecs, TimeUnit.SECONDS); + + // determine whether all replicas have the update + List failedList = null; // lazily init'd + for (int f = 0; f < results.size(); f++) { + Boolean success = false; + Future next = results.get(f); + if (next.isDone() && !next.isCancelled()) { + // looks to have finished, but need to check if it succeeded + try { + success = next.get(); + } catch (ExecutionException e) { + // shouldn't happen since we checked isCancelled + } + } + + if (!success) { + String coreUrl = concurrentTasks.get(f).coreUrl; + log.warn("Core " + coreUrl + "could not get the expected version " + expectedVersion); + if (failedList == null) failedList = new ArrayList<>(); + failedList.add(coreUrl); + } + } + + // if any tasks haven't completed within the specified timeout, it's an error + if (failedList != null) + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + formatString("{0} out of {1} the property {2} to be of version {3} within {4} seconds! Failed cores: {5}", + failedList.size(), concurrentTasks.size() + 1, prop, expectedVersion, maxWaitSecs, failedList)); + + } catch (InterruptedException ie) { + log.warn(formatString( + "Core was interrupted . trying to set the property {1} to version {2} to propagate to {3} replicas for collection {4}", + prop, expectedVersion, concurrentTasks.size(), collection)); + Thread.currentThread().interrupt(); + } finally { + if (!parallelExecutor.isShutdown()) + parallelExecutor.shutdownNow(); + } + + long diffMs = (System.currentTimeMillis() - startMs); + log.info(formatString( + "Took {0} secs to set the property {1} to be of version {2} for collection {3}", + Math.round(diffMs / 1000d), prop, expectedVersion, collection)); + } + + public static List getActiveReplicaCoreUrls(ZkController zkController, + String collection) { + List activeReplicaCoreUrls = new ArrayList<>(); + ClusterState clusterState = zkController.getZkStateReader().getClusterState(); + Set liveNodes = clusterState.getLiveNodes(); + Collection activeSlices = clusterState.getActiveSlices(collection); + if (activeSlices != null && activeSlices.size() > 0) { + for (Slice next : activeSlices) { + Map replicasMap = next.getReplicasMap(); + if (replicasMap != null) { + for (Map.Entry entry : replicasMap.entrySet()) { + Replica replica = entry.getValue(); + if (ZkStateReader.ACTIVE.equals(replica.getStr(ZkStateReader.STATE_PROP)) && + liveNodes.contains(replica.getNodeName())) { + activeReplicaCoreUrls.add(replica.getCoreUrl()); + } + } + } + } + } + return activeReplicaCoreUrls; + } + + private static class PerReplicaCallable extends SolrRequest implements Callable { + String coreUrl; + String prop; + int expectedZkVersion; + Number remoteVersion = null; + int maxWait; + + PerReplicaCallable(String coreUrl, String prop, int expectedZkVersion, int maxWait) { + super(METHOD.GET, "/config/" + ZNODEVER); + this.coreUrl = coreUrl; + this.expectedZkVersion = expectedZkVersion; + this.prop = prop; + this.maxWait = maxWait; + } + + @Override + public SolrParams getParams() { + return new ModifiableSolrParams() + .set(prop, expectedZkVersion) + .set(CommonParams.WT, CommonParams.JAVABIN); + } + + @Override + public Boolean call() throws Exception { + long startTime = System.currentTimeMillis(); + int attempts = 0; + try (HttpSolrClient solr = new HttpSolrClient(coreUrl)) { + // eventually, this loop will get killed by the ExecutorService's timeout + while (true) { + try { + long timeElapsed = (System.currentTimeMillis() - startTime) / 1000; + if (timeElapsed >= maxWait) { + return false; + } + log.info("Time elapsed : {} secs, maxWait {}", timeElapsed, maxWait); + Thread.sleep(100); + NamedList resp = solr.httpUriRequest(this).future.get(); + if (resp != null) { + Map m = (Map) resp.get(ZNODEVER); + if (m != null) { + remoteVersion = (Number) m.get(prop); + if (remoteVersion != null && remoteVersion.intValue() >= expectedZkVersion) break; + } + } + + attempts++; + log.info(formatString("Could not get expectedVersion {0} from {1} for prop {2} after {3} attempts", expectedZkVersion, coreUrl, prop, attempts)); + } catch (Exception e) { + if (e instanceof InterruptedException) { + break; // stop looping + } else { + log.warn("Failed to get /schema/zkversion from " + coreUrl + " due to: " + e); + } + } + } + } + return true; + } + + @Override + public Collection getContentStreams() throws IOException { + return null; + } + + @Override + protected SolrResponse createResponse(SolrClient client) { + return null; + } + } } diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LoggingHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LoggingHandler.java index daf12386364..7df1d67dd61 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LoggingHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LoggingHandler.java @@ -121,7 +121,7 @@ public class LoggingHandler extends RequestHandlerBase implements SolrCoreAware SimpleOrderedMap info = new SimpleOrderedMap<>(); if(time>0) { info.add("since", time); - info.add("found", found); + info.add("found", found.get()); } else { info.add("levels", watcher.getAllLevels()); // show for the first request diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java index 902a40e1eab..fefd3471718 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java @@ -199,9 +199,7 @@ public class HttpShardHandler extends ShardHandler { params.remove(CommonParams.WT); // use default (currently javabin) params.remove(CommonParams.VERSION); - // SolrRequest req = new QueryRequest(SolrRequest.METHOD.POST, "/select"); - // use generic request to avoid extra processing of queries - QueryRequest req = new QueryRequest(params); + QueryRequest req = makeQueryRequest(sreq, params, shard); req.setMethod(SolrRequest.METHOD.POST); // no need to set the response parser as binary is the default @@ -239,12 +237,29 @@ public class HttpShardHandler extends ShardHandler { ssr.elapsedTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS); - return srsp; + return transfomResponse(sreq, srsp, shard); } }; pending.add( completionService.submit(task) ); } + + /** + * Subclasses could modify the request based on the shard + */ + protected QueryRequest makeQueryRequest(final ShardRequest sreq, ModifiableSolrParams params, String shard) + { + // use generic request to avoid extra processing of queries + return new QueryRequest(params); + } + + /** + * Subclasses could modify the Response based on the the shard + */ + protected ShardResponse transfomResponse(final ShardRequest sreq, ShardResponse rsp, String shard) + { + return rsp; + } /** returns a ShardResponse of the last response correlated with a ShardRequest. This won't * return early if it runs into an error. diff --git a/solr/core/src/java/org/apache/solr/logging/log4j/Log4jWatcher.java b/solr/core/src/java/org/apache/solr/logging/log4j/Log4jWatcher.java index e484a97ffb3..e29ba5ba444 100644 --- a/solr/core/src/java/org/apache/solr/logging/log4j/Log4jWatcher.java +++ b/solr/core/src/java/org/apache/solr/logging/log4j/Log4jWatcher.java @@ -16,19 +16,6 @@ */ package org.apache.solr.logging.log4j; - -import com.google.common.base.Throwables; -import org.apache.log4j.AppenderSkeleton; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.spi.LoggingEvent; -import org.apache.log4j.spi.ThrowableInformation; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.logging.CircularList; -import org.apache.solr.logging.ListenerConfig; -import org.apache.solr.logging.LogWatcher; -import org.apache.solr.logging.LoggerInfo; - import java.util.Arrays; import java.util.Collection; import java.util.Date; @@ -37,6 +24,20 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.spi.LoggingEvent; +import org.apache.log4j.spi.ThrowableInformation; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.logging.CircularList; +import org.apache.solr.logging.ListenerConfig; +import org.apache.solr.logging.LogWatcher; +import org.apache.solr.logging.LoggerInfo; + +import com.google.common.base.Throwables; + public class Log4jWatcher extends LogWatcher { final String name; @@ -157,6 +158,12 @@ public class Log4jWatcher extends LogWatcher { if(t!=null) { doc.setField("trace", Throwables.getStackTraceAsString(t.getThrowable())); } + + // Will be null if not present + doc.setField("core", event.getMDC(ZkStateReader.CORE_NAME_PROP)); + doc.setField("collection", event.getMDC(ZkStateReader.COLLECTION_PROP)); + doc.setField("replica", event.getMDC(ZkStateReader.REPLICA_PROP)); + doc.setField("shard", event.getMDC(ZkStateReader.SHARD_ID_PROP)); return doc; } } \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 75d550ed97c..2f0c372ff28 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1494,7 +1494,7 @@ public class ExtendedDismaxQParser extends QParser { try { queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance } catch (SyntaxError e) { - throw new RuntimeException(); + throw new RuntimeException(e); } // Phrase slop array int pslop[] = new int[4]; diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java index d5f9cedcd08..5782ab850ce 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java @@ -99,7 +99,7 @@ public class DocumentExpressionDictionaryFactory extends DictionaryFactory { try { expression = JavascriptCompiler.compile(weightExpression); } catch (ParseException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } SimpleBindings bindings = new SimpleBindings(); for (SortField sortField : sortFields) { diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java index 07ecb4334c2..14333475f07 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java @@ -55,7 +55,7 @@ public class FileDictionaryFactory extends DictionaryFactory { return new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8), fieldDelimiter); } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java index c37219b79d3..49edb6068fd 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java @@ -135,7 +135,7 @@ public class AnalyzingInfixLookupFactory extends LookupFactory { } }; } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } diff --git a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java index 84ce8b2c543..73c5e7ea04c 100644 --- a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java +++ b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java @@ -518,7 +518,7 @@ public class SimplePostTool { Thread.sleep(delay * 1000); filesPosted++; } catch (InterruptedException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } return filesPosted; @@ -610,7 +610,7 @@ public class SimplePostTool { } catch (IOException e) { warn("Caught exception when trying to open connection to "+u+": "+e.getMessage()); } catch (InterruptedException e) { - throw new RuntimeException(); + throw new RuntimeException(e); } } if(!subStack.isEmpty()) { @@ -1209,7 +1209,7 @@ public class SimplePostTool { } catch (IOException e) { warn("IOException opening URL "+url+": "+e.getMessage()); } catch (Exception e) { - throw new RuntimeException(); + throw new RuntimeException(e); } return l; } diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java b/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java index aa469215cca..5717ae9a3cf 100644 --- a/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java +++ b/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java @@ -21,7 +21,6 @@ package org.apache.solr.core; import java.io.File; import java.io.IOException; import java.io.StringReader; -import java.text.MessageFormat; import java.util.Arrays; import java.util.Collections; import java.util.List; diff --git a/solr/core/src/test/org/apache/solr/handler/TestReqParamsAPI.java b/solr/core/src/test/org/apache/solr/handler/TestReqParamsAPI.java index 2e6e038656f..45745b2923b 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReqParamsAPI.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReqParamsAPI.java @@ -41,7 +41,6 @@ import static org.apache.solr.handler.TestSolrConfigHandlerCloud.compareValues; * limitations under the License. */ -@LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-6924") public class TestReqParamsAPI extends AbstractFullDistribZkTestBase { static final Logger log = LoggerFactory.getLogger(TestSolrConfigHandlerCloud.class); private List restTestHarnesses = new ArrayList<>(); diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java index ecd4837d7b2..a51bdc077b7 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java @@ -21,6 +21,8 @@ import org.noggit.JSONUtil; import java.util.Map; +import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; public class Replica extends ZkNodeProps { private final String name; @@ -35,6 +37,9 @@ public class Replica extends ZkNodeProps { public String getName() { return name; } + public String getCoreUrl() { + return ZkCoreNodeProps.getCoreUrl(getStr(BASE_URL_PROP), getStr(CORE_NAME_PROP)); + } /** The name of the node this replica resides on */ public String getNodeName() { diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java index 699059aa2fc..741a357a39f 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java @@ -224,5 +224,9 @@ public interface CommonParams { * When querying a node, prefer local node's cores for distributed queries. */ public static final String PREFER_LOCAL_SHARDS = "preferLocalShards"; + + public static final String JAVABIN = "javabin"; + + public static final String JSON = "json"; } diff --git a/solr/webapp/web/js/scripts/logging.js b/solr/webapp/web/js/scripts/logging.js index 84faeacfd97..a632ab6b5e4 100644 --- a/solr/webapp/web/js/scripts/logging.js +++ b/solr/webapp/web/js/scripts/logging.js @@ -361,6 +361,7 @@ var load_logging_viewer = function() content += '' + "\n"; content += '' + format_time( doc.time ) + '' + "\n"; content += '' + doc.level.esc() + '' + "\n"; + content += '' + doc.core + '' + "\n"; content += '' + doc.logger + '' + "\n"; content += '' + doc.message.replace( /,/g, ',​' ).esc() + '' + "\n"; content += '' + "\n"; @@ -433,6 +434,7 @@ sammy.get '' + "\n" + 'Time (Local)' + "\n" + 'Level' + "\n" + + 'Core' + "\n" + 'Logger' + "\n" + 'Message' + "\n" + '' + "\n" +