mirror of https://github.com/apache/lucene.git
LUCENE-6271: merge trunk changes up to r1670529
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1670533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
41ba046a93
|
@ -32,6 +32,26 @@ API Changes
|
|||
* LUCENE-6067: Accountable.getChildResources has a default
|
||||
implementation returning the empty list. (Robert Muir)
|
||||
|
||||
======================= Lucene 5.2.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-6308: Span queries now share document conjunction/intersection
|
||||
code with boolean queries, and use two-phased iterators for
|
||||
faster intersection by avoiding loading positions in certain cases.
|
||||
(Paul Elschot, Robert Muir via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
|
||||
one of the queries is MatchAllDocsQuery and just invokes the much
|
||||
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
|
||||
Grand, Mike McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
|
||||
(Varun Thacker, Adrien Grand, Mike McCandless)
|
||||
======================= Lucene 5.1.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -32,8 +32,8 @@ import java.util.Iterator;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
|||
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumbers;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
*/
|
||||
public void deleteDocuments(Query... queries) throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
// LUCENE-6379: Specialize MatchAllDocsQuery
|
||||
for(Query query : queries) {
|
||||
if (query.getClass() == MatchAllDocsQuery.class) {
|
||||
deleteAll();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (docWriter.deleteQueries(queries)) {
|
||||
processEvents(true, false);
|
||||
|
|
|
@ -23,8 +23,14 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
class ConjunctionDISI extends DocIdSetIterator {
|
||||
/** A conjunction of DocIdSetIterators.
|
||||
* This iterates over the doc ids that are present in each given DocIdSetIterator.
|
||||
* <br>Public only for use in {@link org.apache.lucene.search.spans}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class ConjunctionDISI extends DocIdSetIterator {
|
||||
|
||||
/** Create a conjunction over the provided iterators, taking advantage of
|
||||
* {@link TwoPhaseIterator}. */
|
||||
|
@ -32,18 +38,16 @@ class ConjunctionDISI extends DocIdSetIterator {
|
|||
final List<DocIdSetIterator> allIterators = new ArrayList<>();
|
||||
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
|
||||
for (DocIdSetIterator iterator : iterators) {
|
||||
TwoPhaseIterator twoPhaseIterator = null;
|
||||
if (iterator instanceof Scorer) {
|
||||
// if we have a scorer, check if it supports two-phase iteration
|
||||
TwoPhaseIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
||||
if (twoPhaseIterator != null) {
|
||||
// Note:
|
||||
allIterators.add(twoPhaseIterator.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIterator);
|
||||
} else {
|
||||
allIterators.add(iterator);
|
||||
}
|
||||
} else {
|
||||
// no approximation support, use the iterator as-is
|
||||
twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
||||
} else if (iterator instanceof Spans) {
|
||||
twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
|
||||
}
|
||||
if (twoPhaseIterator != null) {
|
||||
allIterators.add(twoPhaseIterator.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIterator);
|
||||
} else { // no approximation support, use the iterator as-is
|
||||
allIterators.add(iterator);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* A query that matches all documents.
|
||||
*
|
||||
*/
|
||||
public class MatchAllDocsQuery extends Query {
|
||||
public final class MatchAllDocsQuery extends Query {
|
||||
|
||||
private class MatchAllScorer extends Scorer {
|
||||
final float score;
|
||||
|
@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query {
|
|||
private float queryWeight;
|
||||
private float queryNorm;
|
||||
|
||||
public MatchAllDocsWeight(IndexSearcher searcher) {
|
||||
public MatchAllDocsWeight() {
|
||||
super(MatchAllDocsQuery.this);
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
|
||||
return new MatchAllDocsWeight(searcher);
|
||||
return new MatchAllDocsWeight();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.search.ComplexExplanation;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
@ -71,7 +70,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadNearSpanWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
@ -113,7 +112,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
int result = super.hashCode() ^ getClass().hashCode();
|
||||
result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
|
||||
result = prime * result + ((function == null) ? 0 : function.hashCode());
|
||||
return result;
|
||||
|
@ -149,8 +148,10 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||
similarity, similarity.simScorer(stats, context));
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadNearSpanScorer(spans, this, similarity, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -188,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
protected float payloadScore;
|
||||
private int payloadsSeen;
|
||||
|
||||
protected PayloadNearSpanScorer(Spans spans, Weight weight,
|
||||
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight,
|
||||
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
this.spans = spans;
|
||||
|
@ -200,13 +201,13 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
if (subSpans[i] instanceof NearSpansOrdered) {
|
||||
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
|
||||
subSpans[i].start(), subSpans[i].end());
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
|
||||
} else if (subSpans[i] instanceof NearSpansUnordered) {
|
||||
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
|
||||
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
|
||||
subSpans[i].start(), subSpans[i].end());
|
||||
subSpans[i].startPosition(), subSpans[i].endPosition());
|
||||
}
|
||||
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
|
||||
}
|
||||
|
@ -233,7 +234,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
scratch.length = thePayload.length;
|
||||
payloadScore = function.currentScore(doc, fieldName, start, end,
|
||||
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
|
||||
spans.start(), spans.end(), scratch));
|
||||
spans.startPosition(), spans.endPosition(), scratch));
|
||||
++payloadsSeen;
|
||||
}
|
||||
}
|
||||
|
@ -241,22 +242,20 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
//
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
do {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
more = spans.next();
|
||||
} while (more && (doc == spans.doc()));
|
||||
return true;
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.endPosition() - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -169,7 +169,7 @@ public class PayloadSpanUtil {
|
|||
final boolean inorder = (slop == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
|
||||
inorder);
|
||||
inorder);
|
||||
sp.setBoost(query.getBoost());
|
||||
getPayloads(payloads, sp);
|
||||
}
|
||||
|
@ -186,11 +186,15 @@ public class PayloadSpanUtil {
|
|||
}
|
||||
for (LeafReaderContext leafReaderContext : context.leaves()) {
|
||||
final Spans spans = query.getSpans(leafReaderContext, leafReaderContext.reader().getLiveDocs(), termContexts);
|
||||
while (spans.next() == true) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
for (byte [] bytes : payload) {
|
||||
payloads.add(bytes);
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
for (byte [] bytes : payload) {
|
||||
payloads.add(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -26,10 +27,10 @@ import org.apache.lucene.search.ComplexExplanation;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -60,14 +61,14 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
public PayloadTermQuery(Term term, PayloadFunction function,
|
||||
boolean includeSpanScore) {
|
||||
boolean includeSpanScore) {
|
||||
super(term);
|
||||
this.function = function;
|
||||
this.function = Objects.requireNonNull(function);
|
||||
this.includeSpanScore = includeSpanScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PayloadTermWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
@ -79,9 +80,11 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||
this, similarity.simScorer(stats, context));
|
||||
public PayloadTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null)
|
||||
? null
|
||||
: new PayloadTermSpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||
|
@ -90,45 +93,42 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
protected int payloadsSeen;
|
||||
private final TermSpans termSpans;
|
||||
|
||||
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
public PayloadTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
termSpans = spans;
|
||||
termSpans = spans; // CHECKME: generics to use SpansScorer.spans as TermSpans.
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
while (more && doc == spans.doc()) {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.endPosition() - startPos;
|
||||
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
numMatches++;
|
||||
processPayload(similarity);
|
||||
|
||||
more = spans.next();// this moves positions to the next match in this
|
||||
// document
|
||||
}
|
||||
return more || (freq != 0);
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return freq != 0;
|
||||
}
|
||||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
if (termSpans.isPayloadAvailable()) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
final PostingsEnum postings = termSpans.getPostings();
|
||||
payload = postings.getPayload();
|
||||
if (payload != null) {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
|
||||
} else {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
||||
}
|
||||
payloadsSeen++;
|
||||
|
||||
|
@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
|
||||
PayloadTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
|
@ -220,7 +220,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((function == null) ? 0 : function.hashCode());
|
||||
result = prime * result + function.hashCode();
|
||||
result = prime * result + (includeSpanScore ? 1231 : 1237);
|
||||
return result;
|
||||
}
|
||||
|
@ -234,14 +234,9 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
PayloadTermQuery other = (PayloadTermQuery) obj;
|
||||
if (function == null) {
|
||||
if (other.function != null)
|
||||
return false;
|
||||
} else if (!function.equals(other.function))
|
||||
return false;
|
||||
if (includeSpanScore != other.includeSpanScore)
|
||||
return false;
|
||||
return true;
|
||||
return function.equals(other.function);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return maskedQuery.createWeight(searcher, needsScores);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,10 +19,13 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/**
|
||||
* A {@link Spans} implementation which allows wrapping another spans instance
|
||||
* and override some selected methods.
|
||||
* A {@link Spans} implementation wrapping another spans instance,
|
||||
* allowing to override selected methods in a subclass.
|
||||
*/
|
||||
public class FilterSpans extends Spans {
|
||||
|
||||
|
@ -31,32 +34,37 @@ public class FilterSpans extends Spans {
|
|||
|
||||
/** Wrap the given {@link Spans}. */
|
||||
public FilterSpans(Spans in) {
|
||||
this.in = in;
|
||||
this.in = Objects.requireNonNull(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return in.next();
|
||||
public int nextDoc() throws IOException {
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
return in.skipTo(target);
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
return in.doc();
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return in.start();
|
||||
public int nextStartPosition() throws IOException {
|
||||
return in.nextStartPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return in.end();
|
||||
public int startPosition() {
|
||||
return in.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return in.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -79,4 +87,8 @@ public class FilterSpans extends Spans {
|
|||
return "Filter(" + in.toString() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.ConjunctionDISI;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Common super class for un/ordered Spans
|
||||
*/
|
||||
abstract class NearSpans extends Spans {
|
||||
SpanNearQuery query;
|
||||
int allowedSlop;
|
||||
|
||||
List<Spans> subSpans; // in query order
|
||||
DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
||||
boolean atFirstInCurrentDoc;
|
||||
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
|
||||
|
||||
NearSpans(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
this.query = Objects.requireNonNull(query);
|
||||
this.allowedSlop = query.getSlop();
|
||||
if (subSpans.size() < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
|
||||
}
|
||||
this.subSpans = Objects.requireNonNull(subSpans); // in query order
|
||||
this.conjunction = ConjunctionDISI.intersect(subSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return conjunction.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return conjunction.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return (conjunction.nextDoc() == NO_MORE_DOCS)
|
||||
? NO_MORE_DOCS
|
||||
: toMatchDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return (conjunction.advance(target) == NO_MORE_DOCS)
|
||||
? NO_MORE_DOCS
|
||||
: toMatchDoc();
|
||||
}
|
||||
|
||||
abstract int toMatchDoc() throws IOException;
|
||||
|
||||
abstract boolean twoPhaseCurrentDocMatches() throws IOException;
|
||||
|
||||
/**
|
||||
* Return a {@link TwoPhaseIterator} view of this {@link NearSpans}.
|
||||
*/
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
private Spans[] subSpansArray = null; // init only when needed.
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
if (subSpansArray == null) {
|
||||
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
|
||||
}
|
||||
return subSpansArray;
|
||||
}
|
||||
|
||||
}
|
|
@ -17,24 +17,18 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||
* where the subspans do not overlap and have a maximum slop between them.
|
||||
* where the subspans do not overlap and have a maximum slop between them,
|
||||
* and that does not need to collect payloads.
|
||||
* To also collect payloads, see {@link NearSpansPayloadOrdered}.
|
||||
* <p>
|
||||
* The formed spans only contains minimum slop matches.<br>
|
||||
* The matching slop is computed from the distance(s) between
|
||||
|
@ -55,278 +49,151 @@ import java.util.Set;
|
|||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansOrdered extends Spans {
|
||||
private final int allowedSlop;
|
||||
private boolean firstTime = true;
|
||||
private boolean more = false;
|
||||
public class NearSpansOrdered extends NearSpans {
|
||||
|
||||
/** The spans in the same order as the SpanNearQuery */
|
||||
private final Spans[] subSpans;
|
||||
protected int matchDoc = -1;
|
||||
protected int matchStart = -1;
|
||||
protected int matchEnd = -1;
|
||||
|
||||
/** Indicates that all subSpans have same doc() */
|
||||
private boolean inSameDoc = false;
|
||||
|
||||
private int matchDoc = -1;
|
||||
private int matchStart = -1;
|
||||
private int matchEnd = -1;
|
||||
private List<byte[]> matchPayload;
|
||||
|
||||
private final Spans[] subSpansByDoc;
|
||||
// Even though the array is probably almost sorted, InPlaceMergeSorter will likely
|
||||
// perform better since it has a lower overhead than TimSorter for small arrays
|
||||
private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(subSpansByDoc, i, j);
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
|
||||
}
|
||||
};
|
||||
|
||||
private SpanNearQuery query;
|
||||
private boolean collectPayloads = true;
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
this(spanNearQuery, context, acceptDocs, termContexts, true);
|
||||
}
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
|
||||
throws IOException {
|
||||
if (spanNearQuery.getClauses().length < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 clauses: "
|
||||
+ spanNearQuery);
|
||||
}
|
||||
this.collectPayloads = collectPayloads;
|
||||
allowedSlop = spanNearQuery.getSlop();
|
||||
SpanQuery[] clauses = spanNearQuery.getClauses();
|
||||
subSpans = new Spans[clauses.length];
|
||||
matchPayload = new LinkedList<>();
|
||||
subSpansByDoc = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
|
||||
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
||||
}
|
||||
query = spanNearQuery; // kept for toString() only.
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int doc() { return matchDoc; }
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int start() { return matchStart; }
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public int end() { return matchEnd; }
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
return subSpans;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
// TODO: Would be nice to be able to lazy load payloads
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return matchPayload.isEmpty() == false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long minCost = Long.MAX_VALUE;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
minCost = Math.min(minCost, subSpans[i].cost());
|
||||
}
|
||||
return minCost;
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (firstTime) {
|
||||
firstTime = false;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
if (! subSpans[i].next()) {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
more = true;
|
||||
}
|
||||
if(collectPayloads) {
|
||||
matchPayload.clear();
|
||||
}
|
||||
return advanceAfterOrdered();
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) {
|
||||
firstTime = false;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
if (! subSpans[i].skipTo(target)) {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
more = true;
|
||||
} else if (more && (subSpans[0].doc() < target)) {
|
||||
if (subSpans[0].skipTo(target)) {
|
||||
inSameDoc = false;
|
||||
} else {
|
||||
more = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if(collectPayloads) {
|
||||
matchPayload.clear();
|
||||
}
|
||||
return advanceAfterOrdered();
|
||||
public NearSpansOrdered(SpanNearQuery query, List<Spans> subSpans) throws IOException {
|
||||
super(query, subSpans);
|
||||
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
|
||||
}
|
||||
|
||||
/** Advances the subSpans to just after an ordered match with a minimum slop
|
||||
* that is smaller than the slop allowed by the SpanNearQuery.
|
||||
* @return true iff there is such a match.
|
||||
*/
|
||||
private boolean advanceAfterOrdered() throws IOException {
|
||||
while (more && (inSameDoc || toSameDoc())) {
|
||||
if (stretchToOrder() && shrinkToAfterShortestMatch()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false; // no more matches
|
||||
}
|
||||
|
||||
|
||||
/** Advance the subSpans to the same document */
|
||||
private boolean toSameDoc() throws IOException {
|
||||
sorter.sort(0, subSpansByDoc.length);
|
||||
int firstIndex = 0;
|
||||
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
|
||||
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
|
||||
if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
|
||||
more = false;
|
||||
inSameDoc = false;
|
||||
return false;
|
||||
}
|
||||
maxDoc = subSpansByDoc[firstIndex].doc();
|
||||
if (++firstIndex == subSpansByDoc.length) {
|
||||
firstIndex = 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < subSpansByDoc.length; i++) {
|
||||
assert (subSpansByDoc[i].doc() == maxDoc)
|
||||
: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
|
||||
+ "\n at doc " + subSpansByDoc[i].doc()
|
||||
+ ", but should be at " + maxDoc;
|
||||
}
|
||||
inSameDoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Check whether two Spans in the same document are ordered and not overlapping.
|
||||
* @return false iff spans2's start position is smaller than spans1's end position
|
||||
*/
|
||||
static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
|
||||
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
|
||||
assert spans1.start() < spans1.end();
|
||||
assert spans2.start() < spans2.end();
|
||||
return spans1.end() <= spans2.start();
|
||||
}
|
||||
|
||||
/** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
|
||||
* starts and ends as parameters.
|
||||
*/
|
||||
private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
|
||||
assert start1 < end1;
|
||||
assert start2 < end2;
|
||||
return end1 <= start2;
|
||||
}
|
||||
|
||||
/** Order the subSpans within the same document by advancing all later spans
|
||||
* after the previous one.
|
||||
*/
|
||||
private boolean stretchToOrder() throws IOException {
|
||||
matchDoc = subSpans[0].doc();
|
||||
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
|
||||
while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
|
||||
if (! subSpans[i].next()) {
|
||||
inSameDoc = false;
|
||||
more = false;
|
||||
break;
|
||||
} else if (matchDoc != subSpans[i].doc()) {
|
||||
inSameDoc = false;
|
||||
break;
|
||||
@Override
|
||||
int toMatchDoc() throws IOException {
|
||||
subSpansToFirstStartPosition();
|
||||
while (true) {
|
||||
if (! stretchToOrder()) {
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
subSpansToFirstStartPosition();
|
||||
} else {
|
||||
if (shrinkToAfterShortestMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return conjunction.docID();
|
||||
}
|
||||
// not a match, after shortest ordered spans, not at beginning of doc.
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
subSpansToFirstStartPosition();
|
||||
}
|
||||
}
|
||||
}
|
||||
return inSameDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
subSpansToFirstStartPosition();
|
||||
while (true) {
|
||||
if (! stretchToOrder()) {
|
||||
return false;
|
||||
}
|
||||
if (shrinkToAfterShortestMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
}
|
||||
// not a match, after shortest ordered spans
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return matchStart;
|
||||
}
|
||||
while (true) {
|
||||
if (oneExhaustedInCurrentDoc) {
|
||||
matchStart = NO_MORE_POSITIONS;
|
||||
matchEnd = NO_MORE_POSITIONS;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
if (! stretchToOrder()) {
|
||||
matchStart = NO_MORE_POSITIONS;
|
||||
matchEnd = NO_MORE_POSITIONS;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
if (shrinkToAfterShortestMatch()) { // may also leave oneExhaustedInCurrentDoc
|
||||
return matchStart;
|
||||
}
|
||||
// after shortest ordered spans, or oneExhaustedInCurrentDoc
|
||||
}
|
||||
}
|
||||
|
||||
private void subSpansToFirstStartPosition() throws IOException {
|
||||
for (Spans spans : subSpans) {
|
||||
assert spans.startPosition() == -1 : "spans="+spans;
|
||||
spans.nextStartPosition();
|
||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||
}
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
}
|
||||
|
||||
/** Order the subSpans within the same document by using nextStartPosition on all subSpans
|
||||
* after the first as little as necessary.
|
||||
* Return true when the subSpans could be ordered in this way,
|
||||
* otherwise at least one is exhausted in the current doc.
|
||||
*/
|
||||
private boolean stretchToOrder() throws IOException {
|
||||
Spans prevSpans = subSpans.get(0);
|
||||
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
|
||||
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
|
||||
for (int i = 1; i < subSpans.size(); i++) {
|
||||
Spans spans = subSpans.get(i);
|
||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||
assert spans.endPosition() != NO_MORE_POSITIONS;
|
||||
|
||||
while (prevSpans.endPosition() > spans.startPosition()) { // while overlapping spans
|
||||
if (spans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
prevSpans = spans;
|
||||
}
|
||||
return true; // all subSpans ordered and non overlapping
|
||||
}
|
||||
|
||||
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||
* Compute the slop while making the match as short as possible by advancing
|
||||
* all subSpans except the last one in reverse order.
|
||||
* Compute the slop while making the match as short as possible by using nextStartPosition
|
||||
* on all subSpans, except the last one, in reverse order.
|
||||
*/
|
||||
private boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
matchStart = subSpans[subSpans.length - 1].start();
|
||||
matchEnd = subSpans[subSpans.length - 1].end();
|
||||
Set<byte[]> possibleMatchPayloads = new HashSet<>();
|
||||
if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
|
||||
possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
|
||||
}
|
||||
|
||||
Collection<byte[]> possiblePayload = null;
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
if (collectPayloads && prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
|
||||
int prevStart = prevSpans.start();
|
||||
int prevEnd = prevSpans.end();
|
||||
while (true) { // Advance prevSpans until after (lastStart, lastEnd)
|
||||
if (! prevSpans.next()) {
|
||||
inSameDoc = false;
|
||||
more = false;
|
||||
break; // Check remaining subSpans for final match.
|
||||
} else if (matchDoc != prevSpans.doc()) {
|
||||
inSameDoc = false; // The last subSpans is not advanced here.
|
||||
break; // Check remaining subSpans for last match in this document.
|
||||
} else {
|
||||
int ppStart = prevSpans.start();
|
||||
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
|
||||
if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
|
||||
break; // Check remaining subSpans.
|
||||
} else { // prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
if (collectPayloads && prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
}
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
|
||||
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
break; // Check remaining subSpans for match.
|
||||
}
|
||||
}
|
||||
|
||||
if (collectPayloads && possiblePayload != null) {
|
||||
possibleMatchPayloads.addAll(possiblePayload);
|
||||
int ppStart = prevSpans.startPosition();
|
||||
int ppEnd = prevSpans.endPosition();
|
||||
if (ppEnd > lastStart) { // if overlapping spans
|
||||
break; // Check remaining subSpans.
|
||||
}
|
||||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
}
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
|
@ -335,7 +202,7 @@ public class NearSpansOrdered extends Spans {
|
|||
}
|
||||
|
||||
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||
* that subSpans[0] is advanced after the match, if any.
|
||||
* that on return the first subSpans has nextStartPosition called.
|
||||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
|
@ -344,17 +211,34 @@ public class NearSpansOrdered extends Spans {
|
|||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
||||
if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
|
||||
matchPayload.addAll(possibleMatchPayloads);
|
||||
}
|
||||
|
||||
return match; // ordered and allowed slop
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : matchStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : matchEnd;
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||
return "NearSpansOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/** A {@link NearSpansOrdered} that allows collecting payloads.
|
||||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||
|
||||
private List<byte[]> matchPayload;
|
||||
private Set<byte[]> possibleMatchPayloads;
|
||||
|
||||
public NearSpansPayloadOrdered(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
super(query, subSpans);
|
||||
this.matchPayload = new LinkedList<>();
|
||||
this.possibleMatchPayloads = new HashSet<>();
|
||||
}
|
||||
|
||||
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||
* Compute the slop while making the match as short as possible by using nextStartPosition
|
||||
* on all subSpans, except the last one, in reverse order.
|
||||
* Also collect the payloads.
|
||||
*/
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
matchPayload.clear();
|
||||
possibleMatchPayloads.clear();
|
||||
|
||||
if (lastSubSpans.isPayloadAvailable()) {
|
||||
possibleMatchPayloads.addAll(lastSubSpans.getPayload());
|
||||
}
|
||||
|
||||
Collection<byte[]> possiblePayload = null;
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
while (true) { // prevSpans nextStartPosition until after (lastStart, lastEnd)
|
||||
if (prevSpans.nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
break; // Check remaining subSpans for match.
|
||||
}
|
||||
int ppStart = prevSpans.startPosition();
|
||||
int ppEnd = prevSpans.endPosition();
|
||||
if (ppEnd > lastStart) { // if overlapping spans
|
||||
break; // Check remaining subSpans.
|
||||
}
|
||||
// prevSpans still before (lastStart, lastEnd)
|
||||
prevStart = ppStart;
|
||||
prevEnd = ppEnd;
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
if (possiblePayload == null) {
|
||||
possiblePayload = new ArrayList<>(payload.size());
|
||||
} else {
|
||||
possiblePayload.clear();
|
||||
}
|
||||
possiblePayload.addAll(payload);
|
||||
}
|
||||
}
|
||||
|
||||
if (possiblePayload != null) {
|
||||
possibleMatchPayloads.addAll(possiblePayload);
|
||||
}
|
||||
|
||||
assert prevStart <= matchStart;
|
||||
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||
matchSlop += (matchStart - prevEnd);
|
||||
}
|
||||
|
||||
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||
* that on return the first subSpans has nextStartPosition called.
|
||||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
lastEnd = prevEnd;
|
||||
}
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
||||
if (match && possibleMatchPayloads.size() > 0) {
|
||||
matchPayload.addAll(possibleMatchPayloads);
|
||||
}
|
||||
|
||||
return match; // ordered and allowed slop
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
// TODO: Would be nice to be able to lazy load payloads
|
||||
/** Return payloads when available. */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return matchPayload;
|
||||
}
|
||||
|
||||
/** Indicates whether payloads are available */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return ! matchPayload.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansPayloadOrdered("+query.toString()+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
}
|
||||
|
|
@ -17,17 +17,12 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
|
@ -37,233 +32,210 @@ import java.util.HashSet;
|
|||
* Expert:
|
||||
* Only public for subclassing. Most implementations should not need this class
|
||||
*/
|
||||
public class NearSpansUnordered extends Spans {
|
||||
private SpanNearQuery query;
|
||||
public class NearSpansUnordered extends NearSpans {
|
||||
|
||||
private List<SpansCell> ordered = new ArrayList<>(); // spans in query order
|
||||
private Spans[] subSpans;
|
||||
private int slop; // from query
|
||||
private List<SpansCell> subSpanCells; // in query order
|
||||
|
||||
private SpansCell first; // linked list of spans
|
||||
private SpansCell last; // sorted by doc only
|
||||
private SpanPositionQueue spanPositionQueue;
|
||||
|
||||
private int totalLength; // sum of current lengths
|
||||
public NearSpansUnordered(SpanNearQuery query, List<Spans> subSpans)
|
||||
throws IOException {
|
||||
super(query, subSpans);
|
||||
|
||||
private CellQueue queue; // sorted queue of spans
|
||||
private SpansCell max; // max element in queue
|
||||
this.subSpanCells = new ArrayList<>(subSpans.size());
|
||||
for (Spans subSpan : subSpans) { // sub spans in query order
|
||||
this.subSpanCells.add(new SpansCell(subSpan));
|
||||
}
|
||||
spanPositionQueue = new SpanPositionQueue(subSpans.size());
|
||||
singleCellToPositionQueue(); // -1 startPosition/endPosition also at doc -1
|
||||
}
|
||||
|
||||
private boolean more = true; // true iff not done
|
||||
private boolean firstTime = true; // true before first next()
|
||||
private void singleCellToPositionQueue() {
|
||||
maxEndPositionCell = subSpanCells.get(0);
|
||||
assert maxEndPositionCell.docID() == -1;
|
||||
assert maxEndPositionCell.startPosition() == -1;
|
||||
spanPositionQueue.add(maxEndPositionCell);
|
||||
}
|
||||
|
||||
private class CellQueue extends PriorityQueue<SpansCell> {
|
||||
public CellQueue(int size) {
|
||||
private void subSpanCellsToPositionQueue() throws IOException { // used when all subSpanCells arrived at the same doc.
|
||||
spanPositionQueue.clear();
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
assert cell.startPosition() == -1;
|
||||
cell.nextStartPosition();
|
||||
assert cell.startPosition() != NO_MORE_POSITIONS;
|
||||
spanPositionQueue.add(cell);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpansCell wraps a sub Spans to maintain totalSpanLength and maxEndPositionCell */
|
||||
private int totalSpanLength;
|
||||
private SpansCell maxEndPositionCell;
|
||||
|
||||
private class SpansCell extends FilterSpans {
|
||||
private int spanLength = -1;
|
||||
|
||||
public SpansCell(Spans spans) {
|
||||
super(spans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
int res = in.nextStartPosition();
|
||||
if (res != NO_MORE_POSITIONS) {
|
||||
adjustLength();
|
||||
}
|
||||
adjustMax(); // also after last end position in current doc.
|
||||
return res;
|
||||
}
|
||||
|
||||
private void adjustLength() {
|
||||
if (spanLength != -1) {
|
||||
totalSpanLength -= spanLength; // subtract old, possibly from a previous doc
|
||||
}
|
||||
assert in.startPosition() != NO_MORE_POSITIONS;
|
||||
spanLength = endPosition() - startPosition();
|
||||
assert spanLength >= 0;
|
||||
totalSpanLength += spanLength; // add new
|
||||
}
|
||||
|
||||
private void adjustMax() {
|
||||
assert docID() == maxEndPositionCell.docID();
|
||||
if (endPosition() > maxEndPositionCell.endPosition()) {
|
||||
maxEndPositionCell = this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class SpanPositionQueue extends PriorityQueue<SpansCell> {
|
||||
public SpanPositionQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
|
||||
if (spans1.doc() == spans2.doc()) {
|
||||
return docSpansOrdered(spans1, spans2);
|
||||
} else {
|
||||
return spans1.doc() < spans2.doc();
|
||||
}
|
||||
return positionsOrdered(spans1, spans2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Wraps a Spans, and can be used to form a linked list. */
|
||||
private class SpansCell extends Spans {
|
||||
private Spans spans;
|
||||
private SpansCell next;
|
||||
private int length = -1;
|
||||
private int index;
|
||||
|
||||
public SpansCell(Spans spans, int index) {
|
||||
this.spans = spans;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return adjust(spans.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
return adjust(spans.skipTo(target));
|
||||
}
|
||||
|
||||
private boolean adjust(boolean condition) {
|
||||
if (length != -1) {
|
||||
totalLength -= length; // subtract old length
|
||||
}
|
||||
if (condition) {
|
||||
length = end() - start();
|
||||
totalLength += length; // add new length
|
||||
|
||||
if (max == null || doc() > max.doc()
|
||||
|| (doc() == max.doc()) && (end() > max.end())) {
|
||||
max = this;
|
||||
}
|
||||
}
|
||||
more = condition;
|
||||
return condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return spans.doc(); }
|
||||
|
||||
@Override
|
||||
public int start() { return spans.start(); }
|
||||
|
||||
@Override
|
||||
public int end() { return spans.end(); }
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
return new ArrayList<>(spans.getPayload());
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return spans.isPayloadAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return spans.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() { return spans.toString() + "#" + index; }
|
||||
}
|
||||
|
||||
|
||||
public NearSpansUnordered(SpanNearQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
this.query = query;
|
||||
this.slop = query.getSlop();
|
||||
|
||||
SpanQuery[] clauses = query.getClauses();
|
||||
queue = new CellQueue(clauses.length);
|
||||
subSpans = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
SpansCell cell =
|
||||
new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
|
||||
ordered.add(cell);
|
||||
subSpans[i] = cell.spans;
|
||||
}
|
||||
}
|
||||
public Spans[] getSubSpans() {
|
||||
return subSpans;
|
||||
}
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (firstTime) {
|
||||
initList(true);
|
||||
listToQueue(); // initialize queue
|
||||
firstTime = false;
|
||||
} else if (more) {
|
||||
if (min().next()) { // trigger further scanning
|
||||
queue.updateTop(); // maintain queue
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
}
|
||||
|
||||
while (more) {
|
||||
|
||||
boolean queueStale = false;
|
||||
|
||||
if (min().doc() != max.doc()) { // maintain list
|
||||
queueToList();
|
||||
queueStale = true;
|
||||
}
|
||||
|
||||
// skip to doc w/ all clauses
|
||||
|
||||
while (more && first.doc() < last.doc()) {
|
||||
more = first.skipTo(last.doc()); // skip first upto last
|
||||
firstToLast(); // and move it to the end
|
||||
queueStale = true;
|
||||
}
|
||||
|
||||
if (!more) return false;
|
||||
|
||||
// found doc w/ all clauses
|
||||
|
||||
if (queueStale) { // maintain the queue
|
||||
listToQueue();
|
||||
queueStale = false;
|
||||
}
|
||||
|
||||
if (atMatch()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
more = min().next();
|
||||
if (more) {
|
||||
queue.updateTop(); // maintain queue
|
||||
}
|
||||
}
|
||||
return false; // no more matches
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) { // initialize
|
||||
initList(false);
|
||||
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
|
||||
more = cell.skipTo(target); // skip all
|
||||
}
|
||||
if (more) {
|
||||
listToQueue();
|
||||
}
|
||||
firstTime = false;
|
||||
} else { // normal case
|
||||
while (more && min().doc() < target) { // skip as needed
|
||||
if (min().skipTo(target)) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return more && (atMatch() || next());
|
||||
}
|
||||
|
||||
/** Check whether two Spans in the same document are ordered with possible overlap.
|
||||
* @return true iff spans1 starts before spans2
|
||||
* or the spans start at the same position,
|
||||
* and spans1 ends before spans2.
|
||||
*/
|
||||
static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
|
||||
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
|
||||
int start1 = spans1.start();
|
||||
int start2 = spans2.start();
|
||||
return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
|
||||
static final boolean positionsOrdered(Spans spans1, Spans spans2) {
|
||||
assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID();
|
||||
int start1 = spans1.startPosition();
|
||||
int start2 = spans2.startPosition();
|
||||
return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2);
|
||||
}
|
||||
|
||||
private SpansCell min() { return queue.top(); }
|
||||
private SpansCell minPositionCell() {
|
||||
return spanPositionQueue.top();
|
||||
}
|
||||
|
||||
private boolean atMatch() {
|
||||
assert minPositionCell().docID() == maxEndPositionCell.docID();
|
||||
return (maxEndPositionCell.endPosition() - minPositionCell().startPosition() - totalSpanLength) <= allowedSlop;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return min().doc(); }
|
||||
@Override
|
||||
public int start() { return min().start(); }
|
||||
@Override
|
||||
public int end() { return max.end(); }
|
||||
int toMatchDoc() throws IOException {
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
while (true) {
|
||||
if (atMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
return conjunction.docID();
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
else { // exhausted a subSpan in current doc
|
||||
if (conjunction.nextDoc() == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
// at doc with all subSpans
|
||||
subSpanCellsToPositionQueue();
|
||||
while (true) {
|
||||
if (atMatch()) {
|
||||
atFirstInCurrentDoc = true;
|
||||
oneExhaustedInCurrentDoc = false;
|
||||
return true;
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
else { // exhausted a subSpan in current doc
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return minPositionCell().startPosition();
|
||||
}
|
||||
while (minPositionCell().startPosition() == -1) { // initially at current doc
|
||||
minPositionCell().nextStartPosition();
|
||||
spanPositionQueue.updateTop();
|
||||
}
|
||||
assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
|
||||
while (true) {
|
||||
if (minPositionCell().nextStartPosition() == NO_MORE_POSITIONS) {
|
||||
oneExhaustedInCurrentDoc = true;
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
spanPositionQueue.updateTop();
|
||||
if (atMatch()) {
|
||||
return minPositionCell().startPosition();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
assert minPositionCell() != null;
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
|
||||
: minPositionCell().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
|
||||
: maxEndPositionCell.endPosition();
|
||||
}
|
||||
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
/**
|
||||
* WARNING: The List is not necessarily in order of the the positions
|
||||
* WARNING: The List is not necessarily in order of the positions.
|
||||
* @return Collection of <code>byte[]</code> payloads
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
Set<byte[]> matchPayload = new HashSet<>();
|
||||
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
matchPayload.addAll(cell.getPayload());
|
||||
}
|
||||
|
@ -271,78 +243,23 @@ public class NearSpansUnordered extends Spans {
|
|||
return matchPayload;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
SpansCell pointer = min();
|
||||
while (pointer != null) {
|
||||
if (pointer.isPayloadAvailable()) {
|
||||
for (SpansCell cell : subSpanCells) {
|
||||
if (cell.isPayloadAvailable()) {
|
||||
return true;
|
||||
}
|
||||
pointer = pointer.next;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long minCost = Long.MAX_VALUE;
|
||||
for (int i = 0; i < subSpans.length; i++) {
|
||||
minCost = Math.min(minCost, subSpans[i].cost());
|
||||
}
|
||||
return minCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||
}
|
||||
|
||||
private void initList(boolean next) throws IOException {
|
||||
for (int i = 0; more && i < ordered.size(); i++) {
|
||||
SpansCell cell = ordered.get(i);
|
||||
if (next)
|
||||
more = cell.next(); // move to first entry
|
||||
if (more) {
|
||||
addToList(cell); // add to list
|
||||
}
|
||||
if (minPositionCell() != null) {
|
||||
return getClass().getName() + "("+query.toString()+")@"+
|
||||
(docID()+":"+startPosition()+"-"+endPosition());
|
||||
} else {
|
||||
return getClass().getName() + "("+query.toString()+")@ ?START?";
|
||||
}
|
||||
}
|
||||
|
||||
private void addToList(SpansCell cell) {
|
||||
if (last != null) { // add next to end of list
|
||||
last.next = cell;
|
||||
} else
|
||||
first = cell;
|
||||
last = cell;
|
||||
cell.next = null;
|
||||
}
|
||||
|
||||
private void firstToLast() {
|
||||
last.next = first; // move first to end of list
|
||||
last = first;
|
||||
first = first.next;
|
||||
last.next = null;
|
||||
}
|
||||
|
||||
private void queueToList() {
|
||||
last = first = null;
|
||||
while (queue.top() != null) {
|
||||
addToList(queue.pop());
|
||||
}
|
||||
}
|
||||
|
||||
private void listToQueue() {
|
||||
queue.clear(); // rebuild queue
|
||||
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||
queue.add(cell); // add to queue from list
|
||||
}
|
||||
}
|
||||
|
||||
private boolean atMatch() {
|
||||
return (min().doc() == max.doc())
|
||||
&& ((max.end() - min().start() - totalLength) <= slop);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,10 +37,10 @@ public class SpanFirstQuery extends SpanPositionRangeQuery {
|
|||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
assert spans.start() != spans.end() : "start equals end: " + spans.start();
|
||||
if (spans.start() >= end)
|
||||
return AcceptStatus.NO_AND_ADVANCE;
|
||||
else if (spans.end() <= end)
|
||||
assert spans.startPosition() != spans.endPosition() : "start equals end: " + spans.startPosition();
|
||||
if (spans.startPosition() >= end)
|
||||
return AcceptStatus.NO_MORE_IN_CURRENT_DOC;
|
||||
else if (spans.endPosition() <= end)
|
||||
return AcceptStatus.YES;
|
||||
else
|
||||
return AcceptStatus.NO;
|
||||
|
|
|
@ -105,7 +105,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
//TODO: is this right?
|
||||
h ^= payloadToMatch.hashCode();
|
||||
|
|
|
@ -37,7 +37,8 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
|
||||
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||
* maximum number of intervening unmatched positions, as well as whether
|
||||
* matches are required to be in-order. */
|
||||
* matches are required to be in-order.
|
||||
*/
|
||||
public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||
protected List<SpanQuery> clauses;
|
||||
protected int slop;
|
||||
|
@ -53,7 +54,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
* must be in the same order as in <code>clauses</code> and must be non-overlapping.
|
||||
* <br>When <code>inOrder</code> is false, the spans from each clause
|
||||
* need not be ordered and may overlap.
|
||||
* @param clauses the clauses to find near each other
|
||||
* @param clauses the clauses to find near each other, in the same field, at least 2.
|
||||
* @param slop The slop value
|
||||
* @param inOrder true if order is important
|
||||
*/
|
||||
|
@ -61,14 +62,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
this(clauses, slop, inOrder, true);
|
||||
}
|
||||
|
||||
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) {
|
||||
|
||||
// copy clauses array into an ArrayList
|
||||
this.clauses = new ArrayList<>(clauses.length);
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
SpanQuery clause = clauses[i];
|
||||
if (field == null) { // check field
|
||||
field = clause.getField();
|
||||
public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, boolean collectPayloads) {
|
||||
this.clauses = new ArrayList<>(clausesIn.length);
|
||||
for (SpanQuery clause : clausesIn) {
|
||||
if (this.field == null) { // check field
|
||||
this.field = clause.getField();
|
||||
} else if (clause.getField() != null && !clause.getField().equals(field)) {
|
||||
throw new IllegalArgumentException("Clauses must have same field.");
|
||||
}
|
||||
|
@ -100,7 +98,6 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
@ -124,15 +121,21 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 0) // optimize 0-clause case
|
||||
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
} else {
|
||||
return null; // all required
|
||||
}
|
||||
}
|
||||
|
||||
return inOrder
|
||||
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
|
||||
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
|
||||
// all NearSpans require at least two subSpans
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
||||
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
|
||||
: new NearSpansOrdered(this, subSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -148,9 +151,9 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
}
|
||||
if (clone != null) {
|
||||
return clone; // some clauses rewrote
|
||||
return clone; // some clauses rewrote
|
||||
} else {
|
||||
return this; // no clauses rewrote
|
||||
return this; // no clauses rewrote
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,9 +30,11 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
/** Removes matches which overlap with another SpanQuery or
|
||||
* within a x tokens before or y tokens after another SpanQuery. */
|
||||
/** Removes matches which overlap with another SpanQuery or which are
|
||||
* within x tokens before or y tokens after another SpanQuery.
|
||||
*/
|
||||
public class SpanNotQuery extends SpanQuery implements Cloneable {
|
||||
private SpanQuery include;
|
||||
private SpanQuery exclude;
|
||||
|
@ -57,8 +59,8 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
||||
this.include = include;
|
||||
this.exclude = exclude;
|
||||
this.include = Objects.requireNonNull(include);
|
||||
this.exclude = Objects.requireNonNull(exclude);
|
||||
this.pre = (pre >=0) ? pre : 0;
|
||||
this.post = (post >= 0) ? post : 0;
|
||||
|
||||
|
@ -96,81 +98,153 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public SpanNotQuery clone() {
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
|
||||
(SpanQuery) exclude.clone(), pre, post);
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.clone(),
|
||||
(SpanQuery) exclude.clone(), pre, post);
|
||||
spanNotQuery.setBoost(getBoost());
|
||||
return spanNotQuery;
|
||||
return spanNotQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
if (includeSpans == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
if (excludeSpans == null) {
|
||||
return includeSpans;
|
||||
}
|
||||
|
||||
return new Spans() {
|
||||
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreInclude = true;
|
||||
private boolean moreInclude = true;
|
||||
private int includeStart = -1;
|
||||
private int includeEnd = -1;
|
||||
private boolean atFirstInCurrentDoc = false;
|
||||
|
||||
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreExclude = excludeSpans.next();
|
||||
private boolean moreExclude = excludeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
private int excludeStart = moreExclude ? excludeSpans.nextStartPosition() : NO_MORE_POSITIONS;
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (moreInclude) // move to next include
|
||||
moreInclude = includeSpans.next();
|
||||
|
||||
while (moreInclude && moreExclude) {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (moreInclude) {
|
||||
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
toNextIncluded();
|
||||
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
|
||||
return res;
|
||||
}
|
||||
|
||||
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
|
||||
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start() - pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
private void toNextIncluded() throws IOException {
|
||||
while (moreInclude && moreExclude) {
|
||||
if (includeSpans.docID() > excludeSpans.docID()) {
|
||||
moreExclude = excludeSpans.advance(includeSpans.docID()) != NO_MORE_DOCS;
|
||||
if (moreExclude) {
|
||||
excludeStart = -1; // only use exclude positions at same doc
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
break; // we found a match
|
||||
|
||||
moreInclude = includeSpans.next(); // intersected: keep scanning
|
||||
}
|
||||
return moreInclude;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (moreInclude) // skip include
|
||||
moreInclude = includeSpans.skipTo(target);
|
||||
|
||||
if (!moreInclude)
|
||||
return false;
|
||||
|
||||
if (moreExclude // skip exclude
|
||||
&& includeSpans.doc() > excludeSpans.doc())
|
||||
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start()-pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
if (excludeForwardInCurrentDocAndAtMatch()) {
|
||||
break; // at match.
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
return true; // we found a match
|
||||
// else intersected: keep scanning, to next doc if needed
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
if (includeStart == NO_MORE_POSITIONS) {
|
||||
moreInclude = includeSpans.nextDoc() != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return next(); // scan to next match
|
||||
private boolean excludeForwardInCurrentDocAndAtMatch() throws IOException {
|
||||
assert moreInclude;
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
if (! moreExclude) {
|
||||
return true;
|
||||
}
|
||||
if (includeSpans.docID() != excludeSpans.docID()) {
|
||||
return true;
|
||||
}
|
||||
// at same doc
|
||||
if (excludeStart == -1) { // init exclude start position if needed
|
||||
excludeStart = excludeSpans.nextStartPosition();
|
||||
assert excludeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
while (excludeSpans.endPosition() <= includeStart - pre) {
|
||||
// exclude end position is before a possible exclusion
|
||||
excludeStart = excludeSpans.nextStartPosition();
|
||||
if (excludeStart == NO_MORE_POSITIONS) {
|
||||
return true; // no more exclude at current doc.
|
||||
}
|
||||
}
|
||||
// exclude end position far enough in current doc, check start position:
|
||||
boolean res = includeSpans.endPosition() + post <= excludeStart;
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (moreInclude) {
|
||||
assert target > includeSpans.docID() : "target="+target+", includeSpans.docID()="+includeSpans.docID();
|
||||
moreInclude = includeSpans.advance(target) != NO_MORE_DOCS;
|
||||
if (moreInclude) {
|
||||
atFirstInCurrentDoc = true;
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
}
|
||||
}
|
||||
toNextIncluded();
|
||||
int res = moreInclude ? includeSpans.docID() : NO_MORE_DOCS;
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
int res = includeSpans.docID();
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
assert moreInclude;
|
||||
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
assert includeStart != NO_MORE_POSITIONS;
|
||||
return includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return includeSpans.doc(); }
|
||||
@Override
|
||||
public int start() { return includeSpans.start(); }
|
||||
@Override
|
||||
public int end() { return includeSpans.end(); }
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
while ((includeStart != NO_MORE_POSITIONS)
|
||||
&& (! excludeForwardInCurrentDocAndAtMatch()))
|
||||
{
|
||||
includeStart = includeSpans.nextStartPosition();
|
||||
}
|
||||
|
||||
return includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
assert includeStart == includeSpans.startPosition();
|
||||
return atFirstInCurrentDoc ? -1 : includeStart;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : includeSpans.endPosition();
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
ArrayList<byte[]> result = null;
|
||||
|
@ -180,7 +254,6 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
return result;
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return includeSpans.isPayloadAvailable();
|
||||
|
@ -193,10 +266,9 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + SpanNotQuery.this.toString() + ")";
|
||||
}
|
||||
|
||||
};
|
||||
return "spans(" + SpanNotQuery.this.toString() + ")";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -35,18 +35,19 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/** Matches the union of its clauses.*/
|
||||
/** Matches the union of its clauses.
|
||||
*/
|
||||
public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||
private List<SpanQuery> clauses;
|
||||
private String field;
|
||||
|
||||
/** Construct a SpanOrQuery merging the provided clauses. */
|
||||
/** Construct a SpanOrQuery merging the provided clauses.
|
||||
* All clauses must have the same field.
|
||||
*/
|
||||
public SpanOrQuery(SpanQuery... clauses) {
|
||||
|
||||
// copy clauses array into an ArrayList
|
||||
this.clauses = new ArrayList<>(clauses.length);
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
addClause(clauses[i]);
|
||||
for (SpanQuery seq : clauses) {
|
||||
addClause(seq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -152,90 +153,120 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
protected final boolean lessThan(Spans spans1, Spans spans2) {
|
||||
if (spans1.doc() == spans2.doc()) {
|
||||
if (spans1.start() == spans2.start()) {
|
||||
return spans1.end() < spans2.end();
|
||||
if (spans1.docID() == spans2.docID()) {
|
||||
if (spans1.startPosition() == spans2.startPosition()) {
|
||||
return spans1.endPosition() < spans2.endPosition();
|
||||
} else {
|
||||
return spans1.start() < spans2.start();
|
||||
return spans1.startPosition() < spans2.startPosition();
|
||||
}
|
||||
} else {
|
||||
return spans1.doc() < spans2.doc();
|
||||
return spans1.docID() < spans2.docID();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
}
|
||||
}
|
||||
|
||||
if (subSpans.size() == 0) {
|
||||
return null;
|
||||
} else if (subSpans.size() == 1) {
|
||||
return subSpans.get(0);
|
||||
}
|
||||
|
||||
SpanQueue queue = new SpanQueue(clauses.size());
|
||||
for (Spans spans : subSpans) {
|
||||
queue.add(spans);
|
||||
}
|
||||
|
||||
return new Spans() {
|
||||
private SpanQueue queue = null;
|
||||
private long cost;
|
||||
|
||||
private boolean initSpanQueue(int target) throws IOException {
|
||||
queue = new SpanQueue(clauses.size());
|
||||
Iterator<SpanQuery> i = clauses.iterator();
|
||||
while (i.hasNext()) {
|
||||
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
|
||||
cost += spans.cost();
|
||||
if ( ((target == -1) && spans.next())
|
||||
|| ((target != -1) && spans.skipTo(target))) {
|
||||
queue.add(spans);
|
||||
}
|
||||
}
|
||||
return queue.size() != 0;
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (queue.size() == 0) { // all done
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (queue == null) {
|
||||
return initSpanQueue(-1);
|
||||
}
|
||||
int currentDoc = top().docID();
|
||||
|
||||
if (queue.size() == 0) { // all done
|
||||
return false;
|
||||
}
|
||||
if (currentDoc == -1) { // initially
|
||||
return advance(0);
|
||||
}
|
||||
|
||||
if (top().next()) { // move to next
|
||||
do {
|
||||
if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
|
||||
queue.updateTop();
|
||||
return true;
|
||||
}
|
||||
|
||||
queue.pop(); // exhausted a clause
|
||||
return queue.size() != 0;
|
||||
}
|
||||
|
||||
private Spans top() { return queue.top(); }
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (queue == null) {
|
||||
return initSpanQueue(target);
|
||||
}
|
||||
|
||||
boolean skipCalled = false;
|
||||
while (queue.size() != 0 && top().doc() < target) {
|
||||
if (top().skipTo(target)) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop();
|
||||
} else {
|
||||
queue.pop(); // exhausted a clause
|
||||
if (queue.size() == 0) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
skipCalled = true;
|
||||
}
|
||||
// assert queue.size() > 0;
|
||||
int doc = top().docID();
|
||||
if (doc > currentDoc) {
|
||||
return doc;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
if (skipCalled) {
|
||||
return queue.size() != 0;
|
||||
private Spans top() {
|
||||
return queue.top();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
while ((queue.size() > 0) && (top().docID() < target)) {
|
||||
if (top().advance(target) != NO_MORE_DOCS) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop();
|
||||
}
|
||||
return next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return top().doc(); }
|
||||
@Override
|
||||
public int start() { return top().start(); }
|
||||
@Override
|
||||
public int end() { return top().end(); }
|
||||
return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return (queue == null) ? -1
|
||||
: (queue.size() > 0) ? top().docID()
|
||||
: NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
int startPos = top().startPosition();
|
||||
while (startPos == -1) { // initially at this doc
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
startPos = top().startPosition();
|
||||
}
|
||||
return startPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return top().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return top().endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
|
@ -257,15 +288,23 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
public String toString() {
|
||||
return "spans("+SpanOrQuery.this+")@"+
|
||||
((queue == null)?"START"
|
||||
:(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
|
||||
}
|
||||
:(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
|
||||
}
|
||||
|
||||
private long cost = -1;
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
if (cost == -1) {
|
||||
cost = 0;
|
||||
for (Spans spans : subSpans) {
|
||||
cost += spans.cost();
|
||||
}
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,15 +28,14 @@ import java.util.Iterator;
|
|||
* Only return those matches that have a specific payload at
|
||||
* the given position.
|
||||
* <p>
|
||||
* Do not use this with an SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}. Instead, use
|
||||
* {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* Do not use this with a SpanQuery that contains a {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
* Instead, use {@link SpanNearPayloadCheckQuery} since it properly handles the fact that payloads
|
||||
* aren't ordered by {@link org.apache.lucene.search.spans.SpanNearQuery}.
|
||||
*/
|
||||
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
|
||||
public class SpanPayloadCheckQuery extends SpanPositionCheckQuery {
|
||||
protected final Collection<byte[]> payloadToMatch;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
|
||||
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
|
||||
*/
|
||||
|
@ -108,7 +107,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
//TODO: is this right?
|
||||
h ^= payloadToMatch.hashCode();
|
||||
|
|
|
@ -25,10 +25,9 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -37,9 +36,8 @@ import java.util.Set;
|
|||
public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable {
|
||||
protected SpanQuery match;
|
||||
|
||||
|
||||
public SpanPositionCheckQuery(SpanQuery match) {
|
||||
this.match = match;
|
||||
this.match = Objects.requireNonNull(match);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -71,31 +69,33 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
NO,
|
||||
|
||||
/**
|
||||
* Indicates the match should be rejected, and the enumeration should advance
|
||||
* to the next document.
|
||||
* Indicates the match should be rejected, and the enumeration may continue
|
||||
* with the next document.
|
||||
*/
|
||||
NO_AND_ADVANCE
|
||||
NO_MORE_IN_CURRENT_DOC
|
||||
};
|
||||
|
||||
/**
|
||||
* Implementing classes are required to return whether the current position is a match for the passed in
|
||||
* "match" {@link org.apache.lucene.search.spans.SpanQuery}.
|
||||
* "match" {@link SpanQuery}.
|
||||
*
|
||||
* This is only called if the underlying {@link org.apache.lucene.search.spans.Spans#next()} for the
|
||||
* match is successful
|
||||
* This is only called if the underlying last {@link Spans#nextStartPosition()} for the
|
||||
* match indicated a valid start position.
|
||||
*
|
||||
*
|
||||
* @param spans The {@link org.apache.lucene.search.spans.Spans} instance, positioned at the spot to check
|
||||
* @param spans The {@link Spans} instance, positioned at the spot to check
|
||||
*
|
||||
* @return whether the match is accepted, rejected, or rejected and should move to the next doc.
|
||||
*
|
||||
* @see org.apache.lucene.search.spans.Spans#next()
|
||||
* @see Spans#nextDoc()
|
||||
*
|
||||
*/
|
||||
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
return new PositionCheckSpan(context, acceptDocs, termContexts);
|
||||
Spans matchSpans = match.getSpans(context, acceptDocs, termContexts);
|
||||
return (matchSpans == null) ? null : new PositionCheckSpans(matchSpans);
|
||||
}
|
||||
|
||||
|
||||
|
@ -116,79 +116,110 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
}
|
||||
}
|
||||
|
||||
protected class PositionCheckSpan extends Spans {
|
||||
private Spans spans;
|
||||
protected class PositionCheckSpans extends FilterSpans {
|
||||
|
||||
public PositionCheckSpan(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
spans = match.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean atFirstInCurrentDoc = false;
|
||||
private int startPos = -1;
|
||||
|
||||
public PositionCheckSpans(Spans matchSpans) throws IOException {
|
||||
super(matchSpans);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (!spans.next())
|
||||
return false;
|
||||
public int nextDoc() throws IOException {
|
||||
if (in.nextDoc() == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return doNext();
|
||||
return toNextDocWithAllowedPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (!spans.skipTo(target))
|
||||
return false;
|
||||
public int advance(int target) throws IOException {
|
||||
if (in.advance(target) == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return doNext();
|
||||
return toNextDocWithAllowedPosition();
|
||||
}
|
||||
|
||||
protected boolean doNext() throws IOException {
|
||||
@SuppressWarnings("fallthrough")
|
||||
protected int toNextDocWithAllowedPosition() throws IOException {
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS;
|
||||
for (;;) {
|
||||
switch(acceptPosition(this)) {
|
||||
case YES: return true;
|
||||
case YES:
|
||||
atFirstInCurrentDoc = true;
|
||||
return in.docID();
|
||||
case NO:
|
||||
if (!spans.next())
|
||||
return false;
|
||||
break;
|
||||
case NO_AND_ADVANCE:
|
||||
if (!spans.skipTo(spans.doc()+1))
|
||||
return false;
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos != NO_MORE_POSITIONS) {
|
||||
break;
|
||||
}
|
||||
// else fallthrough
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
if (in.nextDoc() == NO_MORE_DOCS) {
|
||||
startPos = -1;
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() { return spans.doc(); }
|
||||
|
||||
@Override
|
||||
public int start() { return spans.start(); }
|
||||
|
||||
@Override
|
||||
public int end() { return spans.end(); }
|
||||
// TODO: Remove warning after API has been finalized
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
ArrayList<byte[]> result = null;
|
||||
if (spans.isPayloadAvailable()) {
|
||||
result = new ArrayList<>(spans.getPayload());
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (atFirstInCurrentDoc) {
|
||||
atFirstInCurrentDoc = false;
|
||||
return startPos;
|
||||
}
|
||||
return result;//TODO: any way to avoid the new construction?
|
||||
}
|
||||
// TODO: Remove warning after API has been finalized
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return spans.isPayloadAvailable();
|
||||
for (;;) {
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos == NO_MORE_POSITIONS) {
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
switch(acceptPosition(this)) {
|
||||
case YES:
|
||||
return startPos;
|
||||
case NO:
|
||||
break;
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
return startPos = NO_MORE_POSITIONS; // startPos ahead for the current doc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return spans.cost();
|
||||
public int startPosition() {
|
||||
return atFirstInCurrentDoc ? -1 : startPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return atFirstInCurrentDoc ? -1
|
||||
: (startPos != NO_MORE_POSITIONS) ? in.endPosition() : NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
|
||||
}
|
||||
return "spans(" + SpanPositionCheckQuery.this.toString() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true iff <code>o</code> is equal to this. */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null) return false;
|
||||
if (getClass() != o.getClass()) return false;
|
||||
final SpanPositionCheckQuery spcq = (SpanPositionCheckQuery) o;
|
||||
return match.equals(spcq.match);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return match.hashCode() ^ getClass().hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,10 +25,10 @@ import java.io.IOException;
|
|||
/**
|
||||
* Checks to see if the {@link #getMatch()} lies between a start and end position
|
||||
*
|
||||
* @see org.apache.lucene.search.spans.SpanFirstQuery for a derivation that is optimized for the case where start position is 0
|
||||
* See {@link SpanFirstQuery} for a derivation that is optimized for the case where start position is 0.
|
||||
*/
|
||||
public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
||||
protected int start = 0;
|
||||
protected int start;
|
||||
protected int end;
|
||||
|
||||
public SpanPositionRangeQuery(SpanQuery match, int start, int end) {
|
||||
|
@ -40,13 +40,12 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
|
||||
assert spans.start() != spans.end();
|
||||
if (spans.start() >= end)
|
||||
return AcceptStatus.NO_AND_ADVANCE;
|
||||
else if (spans.start() >= start && spans.end() <= end)
|
||||
return AcceptStatus.YES;
|
||||
else
|
||||
return AcceptStatus.NO;
|
||||
assert spans.startPosition() != spans.endPosition();
|
||||
AcceptStatus res = (spans.startPosition() >= end)
|
||||
? AcceptStatus.NO_MORE_IN_CURRENT_DOC
|
||||
: (spans.startPosition() >= start && spans.endPosition() <= end)
|
||||
? AcceptStatus.YES : AcceptStatus.NO;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -96,7 +95,7 @@ public class SpanPositionRangeQuery extends SpanPositionCheckQuery {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = match.hashCode();
|
||||
int h = match.hashCode() ^ getClass().hashCode();
|
||||
h ^= (h << 8) | (h >>> 25); // reversible
|
||||
h ^= Float.floatToRawIntBits(getBoost()) ^ end ^ start;
|
||||
return h;
|
||||
|
|
|
@ -25,13 +25,14 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
/** Expert: Returns the matches for this query in an index. Used internally
|
||||
* to search for spans. */
|
||||
/** Expert: Returns the matches for this query in an index.
|
||||
* Used internally to search for spans.
|
||||
* This may return null to indicate that the SpanQuery has no results.
|
||||
*/
|
||||
public abstract Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
|
||||
|
||||
/**
|
||||
|
@ -42,7 +43,7 @@ public abstract class SpanQuery extends Query {
|
|||
public abstract String getField();
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new SpanWeight(this, searcher);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,9 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/**
|
||||
|
@ -29,58 +29,68 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
public class SpanScorer extends Scorer {
|
||||
protected Spans spans;
|
||||
|
||||
protected boolean more = true;
|
||||
|
||||
protected int doc;
|
||||
protected float freq;
|
||||
protected int numMatches;
|
||||
protected final Similarity.SimScorer docScorer;
|
||||
|
||||
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
|
||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
|
||||
throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.spans = spans;
|
||||
|
||||
doc = -1;
|
||||
more = spans.next();
|
||||
this.docScorer = Objects.requireNonNull(docScorer);
|
||||
this.spans = Objects.requireNonNull(spans);
|
||||
this.doc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (!setFreqCurrentDoc()) {
|
||||
doc = NO_MORE_DOCS;
|
||||
int prevDoc = doc;
|
||||
doc = spans.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (!more) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
|
||||
more = spans.skipTo(target);
|
||||
}
|
||||
if (!setFreqCurrentDoc()) {
|
||||
doc = NO_MORE_DOCS;
|
||||
int prevDoc = doc;
|
||||
doc = spans.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
|
||||
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
|
||||
assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
|
||||
int prevStartPos = -1;
|
||||
int prevEndPos = -1;
|
||||
|
||||
int startPos = spans.nextStartPosition();
|
||||
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
|
||||
do {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
assert startPos >= prevStartPos;
|
||||
int endPos = spans.endPosition();
|
||||
assert endPos != Spans.NO_MORE_POSITIONS;
|
||||
// This assertion can fail for Or spans on the same term:
|
||||
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
|
||||
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
|
||||
numMatches++;
|
||||
more = spans.next();
|
||||
} while (more && (doc == spans.doc()));
|
||||
int matchLength = endPos - startPos;
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
prevStartPos = startPos;
|
||||
prevEndPos = endPos;
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
|
||||
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
|
||||
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -89,7 +99,8 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return docScorer.score(doc, freq);
|
||||
float s = docScorer.score(doc, freq);
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -31,12 +32,16 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** Matches spans containing a term. */
|
||||
/** Matches spans containing a term.
|
||||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
public class SpanTermQuery extends SpanQuery {
|
||||
protected Term term;
|
||||
|
||||
/** Construct a SpanTermQuery matching the named term's spans. */
|
||||
public SpanTermQuery(Term term) { this.term = term; }
|
||||
public SpanTermQuery(Term term) {
|
||||
this.term = Objects.requireNonNull(term);
|
||||
}
|
||||
|
||||
/** Return the term whose spans are matched. */
|
||||
public Term getTerm() { return term; }
|
||||
|
@ -64,7 +69,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||
result = prime * result + term.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -77,12 +82,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
SpanTermQuery other = (SpanTermQuery) obj;
|
||||
if (term == null) {
|
||||
if (other.term != null)
|
||||
return false;
|
||||
} else if (!term.equals(other.term))
|
||||
return false;
|
||||
return true;
|
||||
return term.equals(other.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +112,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
if (state == null) { // term is not present in that reader
|
||||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
return null;
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
|
||||
|
|
|
@ -88,9 +88,9 @@ public class SpanWeight extends Weight {
|
|||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (stats == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
|
||||
}
|
||||
Spans spans = query.getSpans(context, acceptDocs, termContexts);
|
||||
return (spans == null) ? null : new SpanScorer(spans, this, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,54 +20,44 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
/** Expert: an enumeration of span matches. Used to implement span searching.
|
||||
* Each span represents a range of term positions within a document. Matches
|
||||
* are enumerated in order, by increasing document number, within that by
|
||||
* increasing start position and finally by increasing end position. */
|
||||
public abstract class Spans {
|
||||
/** Move to the next match, returning true iff any such exists. */
|
||||
public abstract boolean next() throws IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/** Skips to the first match beyond the current, whose document number is
|
||||
* greater than or equal to <i>target</i>.
|
||||
* <p>The behavior of this method is <b>undefined</b> when called with
|
||||
* <code> target ≤ current</code>, or after the iterator has exhausted.
|
||||
* Both cases may result in unpredicted behavior.
|
||||
* <p>Returns true iff there is such
|
||||
* a match. <p>Behaves as if written:
|
||||
* <pre class="prettyprint">
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
* } while (target > doc());
|
||||
* return true;
|
||||
* }
|
||||
* </pre>
|
||||
* Most implementations are considerably more efficient than that.
|
||||
*/
|
||||
public abstract boolean skipTo(int target) throws IOException;
|
||||
|
||||
/** Returns the document number of the current match. Initially invalid. */
|
||||
public abstract int doc();
|
||||
|
||||
/** Returns the start position of the current match. Initially invalid. */
|
||||
public abstract int start();
|
||||
|
||||
/** Returns the end position of the current match. Initially invalid. */
|
||||
public abstract int end();
|
||||
/** Iterates through combinations of start/end positions per-doc.
|
||||
* Each start/end position represents a range of term positions within the current document.
|
||||
* These are enumerated in order, by increasing document number, within that by
|
||||
* increasing start position and finally by increasing end position.
|
||||
*/
|
||||
public abstract class Spans extends DocIdSetIterator {
|
||||
public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Returns the payload data for the current span.
|
||||
* This is invalid until {@link #next()} is called for
|
||||
* the first time.
|
||||
* Returns the next start position for the current doc.
|
||||
* There is always at least one start/end position per doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int nextStartPosition() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the start position in the current doc, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int startPosition();
|
||||
|
||||
/**
|
||||
* Returns the end position for the current start position, or -1 when {@link #nextStartPosition} was not yet called on the current doc.
|
||||
* After the last start/end position at the current doc this returns {@link #NO_MORE_POSITIONS}.
|
||||
*/
|
||||
public abstract int endPosition();
|
||||
|
||||
/**
|
||||
* Returns the payload data for the current start/end position.
|
||||
* This is only valid after {@link #nextStartPosition()}
|
||||
* returned an available start position.
|
||||
* This method must not be called more than once after each call
|
||||
* of {@link #next()}. However, most payloads are loaded lazily,
|
||||
* of {@link #nextStartPosition()}. However, most payloads are loaded lazily,
|
||||
* so if the payload data for the current position is not needed,
|
||||
* this method may not be called at all for performance reasons. An ordered
|
||||
* SpanQuery does not lazy load, so if you have payloads in your index and
|
||||
* you do not want ordered SpanNearQuerys to collect payloads, you can
|
||||
* disable collection with a constructor option.<br>
|
||||
* this method may not be called at all for performance reasons.
|
||||
* <br>
|
||||
* Note that the return type is a collection, thus the ordering should not be relied upon.
|
||||
* <br>
|
||||
|
@ -76,25 +66,35 @@ public abstract class Spans {
|
|||
* @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
|
||||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
// TODO: Remove warning after API has been finalized
|
||||
public abstract Collection<byte[]> getPayload() throws IOException;
|
||||
|
||||
/**
|
||||
* Checks if a payload can be loaded at this position.
|
||||
* Checks if a payload can be loaded at the current start/end position.
|
||||
* <p>
|
||||
* Payloads can only be loaded once per call to
|
||||
* {@link #next()}.
|
||||
* {@link #nextStartPosition()}.
|
||||
*
|
||||
* @return true if there is a payload available at this position that can be loaded
|
||||
* @return true if there is a payload available at this start/end position
|
||||
* that can be loaded
|
||||
*/
|
||||
public abstract boolean isPayloadAvailable() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the estimated cost of this spans.
|
||||
* <p>
|
||||
* This is generally an upper bound of the number of documents this iterator
|
||||
* might match, but may be a rough heuristic, hardcoded value, or otherwise
|
||||
* completely inaccurate.
|
||||
* Optional method: Return a {@link TwoPhaseIterator} view of this
|
||||
* {@link Spans}. A return value of {@code null} indicates that
|
||||
* two-phase iteration is not supported.
|
||||
*
|
||||
* Note that the returned {@link TwoPhaseIterator}'s
|
||||
* {@link TwoPhaseIterator#approximation() approximation} must
|
||||
* advance synchronously with this iterator: advancing the approximation must
|
||||
* advance this iterator and vice-versa.
|
||||
*
|
||||
* Implementing this method is typically useful on {@link Spans}s
|
||||
* that have a high per-document overhead in order to confirm matches.
|
||||
*
|
||||
* The default implementation returns {@code null}.
|
||||
*/
|
||||
public abstract long cost();
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,10 +24,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Expert:
|
||||
* Public for extension only
|
||||
* Public for extension only.
|
||||
* This does not work correctly for terms that indexed at position Integer.MAX_VALUE.
|
||||
*/
|
||||
public class TermSpans extends Spans {
|
||||
protected final PostingsEnum postings;
|
||||
|
@ -39,65 +41,67 @@ public class TermSpans extends Spans {
|
|||
protected boolean readPayload;
|
||||
|
||||
public TermSpans(PostingsEnum postings, Term term) {
|
||||
this.postings = postings;
|
||||
this.term = term;
|
||||
doc = -1;
|
||||
}
|
||||
|
||||
// only for EmptyTermSpans (below)
|
||||
TermSpans() {
|
||||
term = null;
|
||||
postings = null;
|
||||
this.postings = Objects.requireNonNull(postings);
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.doc = -1;
|
||||
this.position = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (count == freq) {
|
||||
if (postings == null) {
|
||||
return false;
|
||||
}
|
||||
doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
public int nextDoc() throws IOException {
|
||||
doc = postings.nextDoc();
|
||||
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
freq = postings.freq();
|
||||
assert freq >= 1;
|
||||
count = 0;
|
||||
}
|
||||
position = postings.nextPosition();
|
||||
count++;
|
||||
readPayload = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
assert target > doc;
|
||||
doc = postings.advance(target);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
freq = postings.freq();
|
||||
count = 0;
|
||||
position = postings.nextPosition();
|
||||
count++;
|
||||
readPayload = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
position = -1;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > doc;
|
||||
doc = postings.advance(target);
|
||||
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
freq = postings.freq();
|
||||
assert freq >= 1;
|
||||
count = 0;
|
||||
}
|
||||
position = -1;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
if (count == freq) {
|
||||
assert position != NO_MORE_POSITIONS;
|
||||
return position = NO_MORE_POSITIONS;
|
||||
}
|
||||
int prevPosition = position;
|
||||
position = postings.nextPosition();
|
||||
assert position >= prevPosition : "prevPosition="+prevPosition+" > position="+position;
|
||||
assert position != NO_MORE_POSITIONS; // int endPosition not possible
|
||||
count++;
|
||||
readPayload = false;
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return position + 1;
|
||||
public int startPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return (position == -1) ? -1
|
||||
: (position != NO_MORE_POSITIONS) ? position + 1
|
||||
: NO_MORE_POSITIONS;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -105,7 +109,6 @@ public class TermSpans extends Spans {
|
|||
return postings.cost();
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
final BytesRef payload = postings.getPayload();
|
||||
|
@ -120,7 +123,6 @@ public class TermSpans extends Spans {
|
|||
return Collections.singletonList(bytes);
|
||||
}
|
||||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
return readPayload == false && postings.getPayload() != null;
|
||||
|
@ -129,55 +131,12 @@ public class TermSpans extends Spans {
|
|||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + term.toString() + ")@" +
|
||||
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
|
||||
(doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC"
|
||||
: doc + " - " + (position == NO_MORE_POSITIONS ? "ENDPOS" : position));
|
||||
}
|
||||
|
||||
public PostingsEnum getPostings() {
|
||||
return postings;
|
||||
}
|
||||
|
||||
private static final class EmptyTermSpans extends TermSpans {
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans();
|
||||
}
|
||||
|
|
|
@ -18,14 +18,18 @@
|
|||
/**
|
||||
* The calculus of spans.
|
||||
*
|
||||
* <p>A span is a <code><doc,startPosition,endPosition></code> tuple.</p>
|
||||
* <p>A span is a <code><doc,startPosition,endPosition></code> tuple that is enumerated by
|
||||
* class {@link org.apache.lucene.search.spans.Spans Spans}.
|
||||
* </p>
|
||||
*
|
||||
* <p>The following span query operators are implemented:
|
||||
*
|
||||
* <ul>
|
||||
*
|
||||
* <li>A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans
|
||||
* containing a particular {@link org.apache.lucene.index.Term Term}.</li>
|
||||
* containing a particular {@link org.apache.lucene.index.Term Term}.
|
||||
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
|
||||
* </li>
|
||||
*
|
||||
* <li> A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans
|
||||
* which occur near one another, and can be used to implement things like
|
||||
|
|
|
@ -46,6 +46,13 @@ public final class Version {
|
|||
@Deprecated
|
||||
public static final Version LUCENE_5_1_0 = new Version(5, 1, 0);
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 5.2.0 release.
|
||||
* @deprecated Use latest
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Version LUCENE_5_2_0 = new Version(5, 2, 0);
|
||||
|
||||
/** Match settings and bugs in Lucene's 6.0 release.
|
||||
* <p>
|
||||
* Use this to get the latest & greatest settings, bug
|
||||
|
|
|
@ -217,6 +217,9 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
|
||||
public void testNoWaitClose() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
if (directory instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) directory).setPreventDoubleWrite(false);
|
||||
}
|
||||
Document doc = new Document();
|
||||
Field idField = newStringField("id", "", Field.Store.YES);
|
||||
doc.add(idField);
|
||||
|
@ -248,7 +251,6 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
// stress out aborting them on close:
|
||||
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3);
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
try {
|
||||
writer.commit();
|
||||
|
@ -267,7 +269,8 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
setOpenMode(OpenMode.APPEND).
|
||||
setMergePolicy(newLogMergePolicy(100)).
|
||||
// Force excessive merging:
|
||||
setMaxBufferedDocs(2)
|
||||
setMaxBufferedDocs(2).
|
||||
setCommitOnClose(false)
|
||||
);
|
||||
}
|
||||
writer.close();
|
||||
|
|
|
@ -172,7 +172,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
try {
|
||||
i.seek(getFilePointer());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestIndexWriterDeleteByQuery extends LuceneTestCase {
|
||||
|
||||
// LUCENE-6379
|
||||
public void testDeleteMatchAllDocsQuery() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
// Norms are disabled:
|
||||
doc.add(newStringField("field", "foo", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
DirectoryReader r = DirectoryReader.open(w, true);
|
||||
FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field");
|
||||
assertNotNull(fi);
|
||||
assertFalse(fi.hasNorms());
|
||||
assertEquals(1, r.numDocs());
|
||||
assertEquals(1, r.maxDoc());
|
||||
|
||||
w.deleteDocuments(new MatchAllDocsQuery());
|
||||
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
|
||||
r.close();
|
||||
|
||||
assertNotNull(r2);
|
||||
assertEquals(0, r2.numDocs());
|
||||
assertEquals(0, r2.maxDoc());
|
||||
|
||||
// Confirm the omitNorms bit is in fact no longer set:
|
||||
doc = new Document();
|
||||
// Norms are disabled:
|
||||
doc.add(newTextField("field", "foo", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
|
||||
r2.close();
|
||||
assertNotNull(r3);
|
||||
assertEquals(1, r3.numDocs());
|
||||
assertEquals(1, r3.maxDoc());
|
||||
|
||||
// Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery:
|
||||
fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field");
|
||||
assertNotNull(fi);
|
||||
assertTrue(fi.hasNorms());
|
||||
r3.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -238,18 +238,20 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println("\ngetPayloadSpans test");
|
||||
}
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
while (pspans.next()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("doc " + pspans.doc() + ": span " + pspans.start()
|
||||
+ " to " + pspans.end());
|
||||
}
|
||||
Collection<byte[]> payloads = pspans.getPayload();
|
||||
sawZero |= pspans.start() == 0;
|
||||
for (byte[] bytes : payloads) {
|
||||
count++;
|
||||
Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
|
||||
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition()
|
||||
+ " to " + pspans.endPosition());
|
||||
}
|
||||
Collection<byte[]> payloads = pspans.getPayload();
|
||||
sawZero |= pspans.startPosition() == 0;
|
||||
for (byte[] bytes : payloads) {
|
||||
count++;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -257,20 +259,20 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
assertEquals(5, count);
|
||||
|
||||
// System.out.println("\ngetSpans test");
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
count = 0;
|
||||
sawZero = false;
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
sawZero |= spans.start() == 0;
|
||||
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
|
||||
// spans.end());
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
sawZero |= spans.startPosition() == 0;
|
||||
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
|
||||
// spans.end());
|
||||
}
|
||||
}
|
||||
assertEquals(4, count);
|
||||
assertTrue(sawZero);
|
||||
|
||||
// System.out.println("\nPayloadSpanUtil test");
|
||||
|
||||
sawZero = false;
|
||||
PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
|
||||
Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
|
||||
|
|
|
@ -160,7 +160,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
|
||||
}
|
||||
CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
/*float score = hits.score(0);
|
||||
for (int i =1; i < hits.length(); i++)
|
||||
|
@ -211,13 +211,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
}
|
||||
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
|
||||
CheckHits.checkExplanations(query, "field", searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
//should be two matches per document
|
||||
int count = 0;
|
||||
//100 hits times 2 matches per hit, we should have 200 in count
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
assertTrue(count + " does not equal: " + 200, count == 200);
|
||||
}
|
||||
|
@ -253,13 +255,15 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
}
|
||||
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
|
||||
CheckHits.checkExplanations(query, "field", searcher, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
//should be two matches per document
|
||||
int count = 0;
|
||||
//100 hits times 2 matches per hit, we should have 200 in count
|
||||
while (spans.next()) {
|
||||
count++;
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -42,27 +41,32 @@ final class JustCompileSearchSpans {
|
|||
static final class JustCompileSpans extends Spans {
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
public int startPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
@ -103,6 +107,36 @@ final class JustCompileSearchSpans {
|
|||
|
||||
static final class JustCompilePayloadSpans extends Spans {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -113,31 +147,6 @@ final class JustCompileSearchSpans {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -147,7 +156,7 @@ final class JustCompileSearchSpans {
|
|||
|
||||
static final class JustCompileSpanScorer extends SpanScorer {
|
||||
|
||||
protected JustCompileSpanScorer(Spans spans, Weight weight,
|
||||
protected JustCompileSpanScorer(Spans spans, SpanWeight weight,
|
||||
Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
}
|
||||
|
|
|
@ -18,19 +18,18 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -39,141 +38,20 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
* NOTE: This should be used for testing purposes only
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class MultiSpansWrapper extends Spans { // can't be package private due to payloads
|
||||
public class MultiSpansWrapper {
|
||||
|
||||
private SpanQuery query;
|
||||
private List<LeafReaderContext> leaves;
|
||||
private int leafOrd = 0;
|
||||
private Spans current;
|
||||
private Map<Term,TermContext> termContexts;
|
||||
private final int numLeaves;
|
||||
|
||||
private MultiSpansWrapper(List<LeafReaderContext> leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
|
||||
this.query = query;
|
||||
this.leaves = leaves;
|
||||
this.numLeaves = leaves.size();
|
||||
this.termContexts = termContexts;
|
||||
}
|
||||
|
||||
public static Spans wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
|
||||
public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
|
||||
LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
|
||||
LeafReaderContext lrContext = lr.getContext();
|
||||
Query rewrittenQuery = spanQuery.rewrite(lr); // get the term contexts so getSpans can be called directly
|
||||
HashSet<Term> termSet = new HashSet<>();
|
||||
rewrittenQuery.extractTerms(termSet);
|
||||
Map<Term,TermContext> termContexts = new HashMap<>();
|
||||
TreeSet<Term> terms = new TreeSet<>();
|
||||
query.extractTerms(terms);
|
||||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(topLevelReaderContext, term));
|
||||
for (Term term: termSet) {
|
||||
TermContext termContext = TermContext.build(lrContext, term);
|
||||
termContexts.put(term, termContext);
|
||||
}
|
||||
final List<LeafReaderContext> leaves = topLevelReaderContext.leaves();
|
||||
if(leaves.size() == 1) {
|
||||
final LeafReaderContext ctx = leaves.get(0);
|
||||
return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
return new MultiSpansWrapper(leaves, query, termContexts);
|
||||
Spans actSpans = spanQuery.getSpans(lrContext, new Bits.MatchAllBits(lr.numDocs()), termContexts);
|
||||
return actSpans;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (leafOrd >= numLeaves) {
|
||||
return false;
|
||||
}
|
||||
if (current == null) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
while(true) {
|
||||
if (current.next()) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < numLeaves) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (leafOrd >= numLeaves) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int subIndex = ReaderUtil.subIndex(target, leaves);
|
||||
assert subIndex >= leafOrd;
|
||||
if (subIndex != leafOrd) {
|
||||
final LeafReaderContext ctx = leaves.get(subIndex);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
leafOrd = subIndex;
|
||||
} else if (current == null) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
}
|
||||
while (true) {
|
||||
if (target < leaves.get(leafOrd).docBase) {
|
||||
// target was in the previous slice
|
||||
if (current.next()) {
|
||||
return true;
|
||||
}
|
||||
} else if (current.skipTo(target - leaves.get(leafOrd).docBase)) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < numLeaves) {
|
||||
final LeafReaderContext ctx = leaves.get(leafOrd);
|
||||
current = query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doc() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.doc() + leaves.get(leafOrd).docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
if (current == null) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return current.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
if (current == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return current.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
if (current == null) {
|
||||
return false;
|
||||
}
|
||||
return current.isPayloadAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return Integer.MAX_VALUE; // just for tests
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -651,47 +651,6 @@ public class TestBasics extends LuceneTestCase {
|
|||
1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansSkipTo() throws Exception {
|
||||
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
|
||||
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
|
||||
Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
|
||||
Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
|
||||
|
||||
assertTrue(s1.next());
|
||||
assertTrue(s2.next());
|
||||
|
||||
boolean hasMore = true;
|
||||
|
||||
do {
|
||||
hasMore = skipToAccordingToJavaDocs(s1, s1.doc() + 1);
|
||||
assertEquals(hasMore, s2.skipTo(s2.doc() + 1));
|
||||
assertEquals(s1.doc(), s2.doc());
|
||||
} while (hasMore);
|
||||
}
|
||||
|
||||
/** Skips to the first match beyond the current, whose document number is
|
||||
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||
* a match. <p>Behaves as if written: <pre>
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
* } while (target > doc());
|
||||
* return true;
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
private boolean skipToAccordingToJavaDocs(Spans s, int target)
|
||||
throws Exception {
|
||||
do {
|
||||
if (!s.next())
|
||||
return false;
|
||||
} while (target > s.doc());
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
CheckHits.checkHits(random(), query, "field", searcher, results);
|
||||
}
|
||||
|
|
|
@ -259,36 +259,18 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender"));
|
||||
check(q, new int[] { 0, 1, 2, 3, 4 });
|
||||
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,1,2), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,1,2), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,2,3), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(3,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(4,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(4,1,2), s(span));
|
||||
|
||||
assertEquals(false, span.next());
|
||||
TestSpans.tstNextSpans(span, 0,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,0,1);
|
||||
TestSpans.tstNextSpans(span, 2,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,2,3);
|
||||
TestSpans.tstNextSpans(span, 3,0,1);
|
||||
TestSpans.tstNextSpans(span, 4,0,1);
|
||||
TestSpans.tstNextSpans(span, 4,1,2);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
public void testSpans1() throws Exception {
|
||||
|
@ -300,15 +282,18 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
check(qA, new int[] { 0, 1, 2, 4 });
|
||||
check(qB, new int[] { 0, 1, 2, 4 });
|
||||
|
||||
Spans spanA = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qA);
|
||||
Spans spanB = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qB);
|
||||
Spans spanA = MultiSpansWrapper.wrap(searcher.getIndexReader(), qA);
|
||||
Spans spanB = MultiSpansWrapper.wrap(searcher.getIndexReader(), qB);
|
||||
|
||||
while (spanA.next()) {
|
||||
assertTrue("spanB not still going", spanB.next());
|
||||
assertEquals("spanA not equal spanB", s(spanA), s(spanB));
|
||||
while (spanA.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc());
|
||||
while (spanA.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
assertEquals("spanB start position", spanA.startPosition(), spanB.nextStartPosition());
|
||||
assertEquals("spanB end position", spanA.endPosition(), spanB.endPosition());
|
||||
}
|
||||
assertEquals("spanB start position", Spans.NO_MORE_POSITIONS, spanB.nextStartPosition());
|
||||
}
|
||||
assertTrue("spanB still going even tough spanA is done", !(spanB.next()));
|
||||
|
||||
assertEquals("spanB end doc", Spans.NO_MORE_DOCS, spanB.nextDoc());
|
||||
}
|
||||
|
||||
public void testSpans2() throws Exception {
|
||||
|
@ -323,29 +308,16 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
new FieldMaskingSpanQuery(qB, "id") }, -1, false );
|
||||
check(q, new int[] { 0, 1, 2, 3 });
|
||||
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,1,2), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,0,1), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(2,2,3), s(span));
|
||||
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(3,0,1), s(span));
|
||||
|
||||
assertEquals(false, span.next());
|
||||
TestSpans.tstNextSpans(span, 0,0,1);
|
||||
TestSpans.tstNextSpans(span, 1,1,2);
|
||||
TestSpans.tstNextSpans(span, 2,0,1);
|
||||
TestSpans.tstNextSpans(span, 2,2,3);
|
||||
TestSpans.tstNextSpans(span, 3,0,1);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
public String s(Spans span) {
|
||||
return s(span.doc(), span.start(), span.end());
|
||||
}
|
||||
public String s(int doc, int start, int end) {
|
||||
return "s(" + doc + "," + start + "," + end +")";
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public String s(Spans span) {
|
||||
return s(span.doc(), span.start(), span.end());
|
||||
return s(span.docID(), span.startPosition(), span.endPosition());
|
||||
}
|
||||
public String s(int doc, int start, int end) {
|
||||
return "s(" + doc + "," + start + "," + end +")";
|
||||
|
@ -114,12 +114,10 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
|
||||
public void testNearSpansNext() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.next());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
TestSpans.tstNextSpans(span,0,0,3);
|
||||
TestSpans.tstNextSpans(span,1,0,4);
|
||||
TestSpans.tstEndSpans(span);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -127,51 +125,58 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
* same as next -- it's only applicable in this case since we know doc
|
||||
* does not contain more than one span
|
||||
*/
|
||||
public void testNearSpansSkipToLikeNext() throws Exception {
|
||||
public void testNearSpansAdvanceLikeNext() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(0));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(0, span.advance(0));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.skipTo(1));
|
||||
assertEquals(1, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.skipTo(2));
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansNextThenSkipTo() throws Exception {
|
||||
public void testNearSpansNextThenAdvance() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(true, span.skipTo(1));
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
assertEquals(false, span.next());
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
}
|
||||
|
||||
public void testNearSpansNextThenSkipPast() throws Exception {
|
||||
public void testNearSpansNextThenAdvancePast() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.next());
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc());
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
assertEquals(false, span.skipTo(2));
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipPast() throws Exception {
|
||||
public void testNearSpansAdvancePast() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(false, span.skipTo(2));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(Spans.NO_MORE_DOCS, span.advance(2));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipTo0() throws Exception {
|
||||
public void testNearSpansAdvanceTo0() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(0));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(0, span.advance(0));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(0,0,3), s(span));
|
||||
}
|
||||
|
||||
public void testNearSpansSkipTo1() throws Exception {
|
||||
public void testNearSpansAdvanceTo1() throws Exception {
|
||||
SpanNearQuery q = makeQuery();
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q);
|
||||
assertEquals(true, span.skipTo(1));
|
||||
Spans span = MultiSpansWrapper.wrap(searcher.getIndexReader(), q);
|
||||
assertEquals(1, span.advance(1));
|
||||
assertEquals(0, span.nextStartPosition());
|
||||
assertEquals(s(1,0,4), s(span));
|
||||
}
|
||||
|
||||
|
|
|
@ -67,12 +67,12 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq;
|
||||
Spans spans;
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 1, 1, 1);
|
||||
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
|
||||
spans = MultiSpansWrapper.wrap(indexReader.getContext(), stq);
|
||||
spans = MultiSpansWrapper.wrap(indexReader, stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 100, 0, 0, 0);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanFirstQuery sfq;
|
||||
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader.getContext(), sfq);
|
||||
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq);
|
||||
checkSpans(spans, 109, 1, 1, 1);
|
||||
//Test more complicated subclause
|
||||
SpanQuery[] clauses = new SpanQuery[2];
|
||||
|
@ -91,11 +91,11 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
|
||||
match = new SpanNearQuery(clauses, 0, true);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
|
||||
match = new SpanNearQuery(clauses, 0, false);
|
||||
sfq = new SpanFirstQuery(match, 2);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader.getContext(), sfq), 100, 2, 1, 1);
|
||||
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq), 100, 2, 1, 1);
|
||||
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
|
||||
checkSpans(MultiSpansWrapper.wrap(reader.getContext(), snq), 1,new int[]{2});
|
||||
checkSpans(MultiSpansWrapper.wrap(reader, snq), 1,new int[]{2});
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -129,10 +129,8 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
Spans spans;
|
||||
IndexSearcher searcher = getSearcher();
|
||||
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), stq);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 0, null);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq);
|
||||
assertNull(spans);
|
||||
|
||||
SpanQuery[] clauses = new SpanQuery[3];
|
||||
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
|
||||
|
@ -140,7 +138,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
|
||||
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
|
||||
|
@ -151,7 +149,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
spanNearQuery = new SpanNearQuery(clauses, 6, true);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
|
@ -174,7 +172,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
// yy within 6 of xx within 6 of rr
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{3,3});
|
||||
closeIndexReader.close();
|
||||
|
@ -205,7 +203,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
clauses3[1] = snq;
|
||||
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 1, new int[]{3});
|
||||
|
@ -243,7 +241,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
|
||||
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery);
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
checkSpans(spans, 2, new int[]{8, 8});
|
||||
closeIndexReader.close();
|
||||
|
@ -267,16 +265,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -303,15 +303,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
for (final byte[] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -338,16 +341,18 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
||||
SpanQuery[] sqs = { stq1, stq2 };
|
||||
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
|
||||
|
||||
TopDocs topDocs = is.search(snq, 1);
|
||||
Set<String> payloadSet = new HashSet<>();
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -395,31 +400,22 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
//each position match should have a span associated with it, since there is just one underlying term query, there should
|
||||
//only be one entry in the span
|
||||
int seen = 0;
|
||||
while (spans.next() == true)
|
||||
{
|
||||
//if we expect payloads, then isPayloadAvailable should be true
|
||||
if (expectedNumPayloads > 0) {
|
||||
assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable()
|
||||
+ " and it should be: " + (expectedNumPayloads > 0),
|
||||
spans.isPayloadAvailable() == true);
|
||||
} else {
|
||||
assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false);
|
||||
}
|
||||
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads);
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength,
|
||||
thePayload.length == expectedPayloadLength);
|
||||
assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte);
|
||||
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
assertEquals("isPayloadAvailable should return true/false as payloads are expected", expectedNumPayloads > 0, spans.isPayloadAvailable());
|
||||
//See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
assertEquals("payload size", expectedNumPayloads, payload.size());
|
||||
for (final byte [] thePayload : payload) {
|
||||
assertEquals("payload length", expectedPayloadLength, thePayload.length);
|
||||
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
|
||||
}
|
||||
}
|
||||
|
||||
seen++;
|
||||
}
|
||||
seen++;
|
||||
}
|
||||
assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
|
||||
assertEquals("expectedNumSpans", expectedNumSpans, seen);
|
||||
}
|
||||
|
||||
private IndexSearcher getSearcher() throws Exception {
|
||||
|
@ -446,27 +442,28 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
|
||||
int cnt = 0;
|
||||
|
||||
while (spans.next() == true) {
|
||||
if(VERBOSE)
|
||||
System.out.println("\nSpans Dump --");
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
|
||||
+ new String(bytes, StandardCharsets.UTF_8));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
if(VERBOSE)
|
||||
System.out.println("\nSpans Dump --");
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
if(VERBOSE) {
|
||||
System.out.println("payloads for span:" + payload.size());
|
||||
for (final byte [] bytes : payload) {
|
||||
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
|
||||
+ new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
assertEquals("payload size", numPayloads[cnt], payload.size());
|
||||
} else { // no payload available
|
||||
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0", numPayloads.length > 0 && numPayloads[cnt] > 0 );
|
||||
}
|
||||
|
||||
assertEquals(numPayloads[cnt],payload.size());
|
||||
} else {
|
||||
assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 );
|
||||
cnt++;
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
|
||||
assertEquals(numSpans, cnt);
|
||||
assertEquals("expected numSpans", numSpans, cnt);
|
||||
}
|
||||
|
||||
final class PayloadAnalyzer extends Analyzer {
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -201,117 +200,55 @@ public class TestSpans extends LuceneTestCase {
|
|||
makeSpanTermQuery("t3") },
|
||||
slop,
|
||||
ordered);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
|
||||
|
||||
assertTrue("first range", spans.next());
|
||||
assertEquals("first doc", 11, spans.doc());
|
||||
assertEquals("first start", 0, spans.start());
|
||||
assertEquals("first end", 4, spans.end());
|
||||
assertEquals("first doc", 11, spans.nextDoc());
|
||||
assertEquals("first start", 0, spans.nextStartPosition());
|
||||
assertEquals("first end", 4, spans.endPosition());
|
||||
|
||||
assertTrue("second range", spans.next());
|
||||
assertEquals("second doc", 11, spans.doc());
|
||||
assertEquals("second start", 2, spans.start());
|
||||
assertEquals("second end", 6, spans.end());
|
||||
assertEquals("second start", 2, spans.nextStartPosition());
|
||||
assertEquals("second end", 6, spans.endPosition());
|
||||
|
||||
assertFalse("third range", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
|
||||
public void testSpanNearUnOrdered() throws Exception {
|
||||
|
||||
//See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
|
||||
SpanNearQuery snq;
|
||||
snq = new SpanNearQuery(
|
||||
SpanNearQuery senq;
|
||||
senq = new SpanNearQuery(
|
||||
new SpanQuery[] {
|
||||
makeSpanTermQuery("u1"),
|
||||
makeSpanTermQuery("u2") },
|
||||
0,
|
||||
false);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 1, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 10, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false);
|
||||
Spans spans = MultiSpansWrapper.wrap(reader, senq);
|
||||
tstNextSpans(spans, 4, 1, 3);
|
||||
tstNextSpans(spans, 5, 2, 4);
|
||||
tstNextSpans(spans, 8, 2, 4);
|
||||
tstNextSpans(spans, 9, 0, 2);
|
||||
tstNextSpans(spans, 10, 0, 2);
|
||||
tstEndSpans(spans);
|
||||
|
||||
SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"),
|
||||
makeSpanTermQuery("u2")}, 0, false);
|
||||
snq = new SpanNearQuery(
|
||||
senq = new SpanNearQuery(
|
||||
new SpanQuery[] {
|
||||
u1u2,
|
||||
makeSpanTermQuery("u2")
|
||||
},
|
||||
1,
|
||||
false);
|
||||
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
//unordered spans can be subsets
|
||||
assertEquals("doc", 4, spans.doc());
|
||||
assertEquals("start", 1, spans.start());
|
||||
assertEquals("end", 3, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 5, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 8, spans.doc());
|
||||
assertEquals("start", 2, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 9, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 4, spans.end());
|
||||
|
||||
assertTrue("Does not have next and it should", spans.next());
|
||||
assertEquals("doc", 10, spans.doc());
|
||||
assertEquals("start", 0, spans.start());
|
||||
assertEquals("end", 2, spans.end());
|
||||
|
||||
assertTrue("Has next and it shouldn't", spans.next() == false);
|
||||
spans = MultiSpansWrapper.wrap(reader, senq);
|
||||
tstNextSpans(spans, 4, 0, 3);
|
||||
tstNextSpans(spans, 4, 1, 3); // unordered spans can be subsets
|
||||
tstNextSpans(spans, 5, 0, 4);
|
||||
tstNextSpans(spans, 5, 2, 4);
|
||||
tstNextSpans(spans, 8, 0, 4);
|
||||
tstNextSpans(spans, 8, 2, 4);
|
||||
tstNextSpans(spans, 9, 0, 2);
|
||||
tstNextSpans(spans, 9, 0, 4);
|
||||
tstNextSpans(spans, 10, 0, 2);
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
|
||||
|
@ -321,20 +258,39 @@ public class TestSpans extends LuceneTestCase {
|
|||
for (int i = 0; i < terms.length; i++) {
|
||||
sqa[i] = makeSpanTermQuery(terms[i]);
|
||||
}
|
||||
return MultiSpansWrapper.wrap(searcher.getTopReaderContext(), new SpanOrQuery(sqa));
|
||||
return MultiSpansWrapper.wrap(searcher.getIndexReader(), new SpanOrQuery(sqa));
|
||||
}
|
||||
|
||||
private void tstNextSpans(Spans spans, int doc, int start, int end)
|
||||
throws Exception {
|
||||
assertTrue("next", spans.next());
|
||||
assertEquals("doc", doc, spans.doc());
|
||||
assertEquals("start", start, spans.start());
|
||||
assertEquals("end", end, spans.end());
|
||||
public static void tstNextSpans(Spans spans, int doc, int start, int end) throws IOException {
|
||||
if (spans.docID() >= doc) {
|
||||
assertEquals("docId", doc, spans.docID());
|
||||
} else { // nextDoc needed before testing start/end
|
||||
if (spans.docID() >= 0) {
|
||||
assertEquals("nextStartPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.nextStartPosition());
|
||||
assertEquals("endPosition of previous doc", Spans.NO_MORE_POSITIONS, spans.endPosition());
|
||||
}
|
||||
assertEquals("nextDoc", doc, spans.nextDoc());
|
||||
if (doc != Spans.NO_MORE_DOCS) {
|
||||
assertEquals("first startPosition", -1, spans.startPosition());
|
||||
assertEquals("first endPosition", -1, spans.endPosition());
|
||||
}
|
||||
}
|
||||
if (doc != Spans.NO_MORE_DOCS) {
|
||||
assertEquals("nextStartPosition", start, spans.nextStartPosition());
|
||||
assertEquals("startPosition", start, spans.startPosition());
|
||||
assertEquals("endPosition", end, spans.endPosition());
|
||||
}
|
||||
}
|
||||
|
||||
public static void tstEndSpans(Spans spans) throws Exception {
|
||||
if (spans != null) { // null Spans is empty
|
||||
tstNextSpans(spans, Spans.NO_MORE_DOCS, -2, -2); // start and end positions will be ignored
|
||||
}
|
||||
}
|
||||
|
||||
public void testSpanOrEmpty() throws Exception {
|
||||
Spans spans = orSpans(new String[0]);
|
||||
assertFalse("empty next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
|
||||
SpanOrQuery a = new SpanOrQuery();
|
||||
SpanOrQuery b = new SpanOrQuery();
|
||||
|
@ -344,24 +300,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
public void testSpanOrSingle() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w5"});
|
||||
tstNextSpans(spans, 0, 4, 5);
|
||||
assertFalse("final next", spans.next());
|
||||
}
|
||||
|
||||
public void testSpanOrMovesForward() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w1", "xx"});
|
||||
|
||||
spans.next();
|
||||
int doc = spans.doc();
|
||||
assertEquals(0, doc);
|
||||
|
||||
spans.skipTo(0);
|
||||
doc = spans.doc();
|
||||
|
||||
// LUCENE-1583:
|
||||
// according to Spans, a skipTo to the same doc or less
|
||||
// should still call next() on the underlying Spans
|
||||
assertEquals(1, doc);
|
||||
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrDouble() throws Exception {
|
||||
|
@ -370,17 +309,15 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 2, 3, 4);
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrDoubleSkip() throws Exception {
|
||||
public void testSpanOrDoubleAdvance() throws Exception {
|
||||
Spans spans = orSpans(new String[] {"w5", "yy"});
|
||||
assertTrue("initial skipTo", spans.skipTo(3));
|
||||
assertEquals("doc", 3, spans.doc());
|
||||
assertEquals("start", 4, spans.start());
|
||||
assertEquals("end", 5, spans.end());
|
||||
assertEquals("initial advance", 3, spans.advance(3));
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrUnused() throws Exception {
|
||||
|
@ -389,7 +326,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 2, 3, 4);
|
||||
tstNextSpans(spans, 3, 4, 5);
|
||||
tstNextSpans(spans, 7, 3, 4);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanOrTripleSameDoc() throws Exception {
|
||||
|
@ -400,7 +337,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
tstNextSpans(spans, 11, 3, 4);
|
||||
tstNextSpans(spans, 11, 4, 5);
|
||||
tstNextSpans(spans, 11, 5, 6);
|
||||
assertFalse("final next", spans.next());
|
||||
tstEndSpans(spans);
|
||||
}
|
||||
|
||||
public void testSpanScorerZeroSloppyFreq() throws Exception {
|
||||
|
@ -439,8 +376,8 @@ public class TestSpans extends LuceneTestCase {
|
|||
assertEquals("first doc number", spanScorer.docID() + ctx.docBase, 11);
|
||||
float score = spanScorer.score();
|
||||
assertTrue("first doc score should be zero, " + score, score == 0.0f);
|
||||
} else {
|
||||
assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
} else {
|
||||
assertTrue("no second doc", spanScorer == null || spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -542,11 +479,15 @@ public class TestSpans extends LuceneTestCase {
|
|||
SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
|
||||
SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
|
||||
SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), snq);
|
||||
|
||||
int i = 0;
|
||||
while (spans.next()){
|
||||
i++;
|
||||
if (spans != null) {
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS){
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests Spans (v2)
|
||||
*
|
||||
*/
|
||||
public class TestSpansEnum extends LuceneTestCase {
|
||||
private static IndexSearcher searcher;
|
||||
private static IndexReader reader;
|
||||
private static Directory directory;
|
||||
|
||||
static final class SimplePayloadFilter extends TokenFilter {
|
||||
int pos;
|
||||
final PayloadAttribute payloadAttr;
|
||||
final CharTermAttribute termAttr;
|
||||
|
||||
public SimplePayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
pos = 0;
|
||||
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
||||
termAttr = input.addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8)));
|
||||
pos++;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static Analyzer simplePayloadAnalyzer;
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
simplePayloadAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(simplePayloadAnalyzer)
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
for (int i = 100; i < 110; i++) {
|
||||
Document doc = new Document(); // doc id 10-19 have 100-109
|
||||
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
reader = writer.getReader();
|
||||
searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
directory.close();
|
||||
searcher = null;
|
||||
reader = null;
|
||||
directory = null;
|
||||
simplePayloadAnalyzer = null;
|
||||
}
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
CheckHits.checkHits(random(), query, "field", searcher, results);
|
||||
}
|
||||
|
||||
SpanTermQuery spanTQ(String term) {
|
||||
return new SpanTermQuery(new Term("field", term));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t2 = spanTQ("two");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t2);
|
||||
checkHits(soq, new int[] {1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr2() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t11 = spanTQ("eleven");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t11);
|
||||
checkHits(soq, new int[] {1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOr3() throws Exception {
|
||||
SpanTermQuery t12 = spanTQ("twelve");
|
||||
SpanTermQuery t11 = spanTQ("eleven");
|
||||
SpanOrQuery soq = new SpanOrQuery(t12, t11);
|
||||
checkHits(soq, new int[] {});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumOrNot1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t2 = spanTQ("two");
|
||||
SpanOrQuery soq = new SpanOrQuery(t1, t2);
|
||||
SpanNotQuery snq = new SpanNotQuery(soq, t1);
|
||||
checkHits(snq, new int[] {2,12});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter1() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 0);
|
||||
checkHits(snq, new int[] {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // include all "one hundred ..."
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter2() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 1, 0);
|
||||
checkHits(snq, new int[] {}); // exclude all "one hundred ..."
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpansEnumNotBeforeAfter3() throws Exception {
|
||||
SpanTermQuery t1 = spanTQ("one");
|
||||
SpanTermQuery t100 = spanTQ("hundred");
|
||||
SpanNotQuery snq = new SpanNotQuery(t100, t1, 0, 1);
|
||||
checkHits(snq, new int[] {10, 12, 13, 14, 15, 16, 17, 18, 19}); // exclude "one hundred one"
|
||||
}
|
||||
}
|
|
@ -215,7 +215,7 @@ public class RandomSamplingFacetsCollector extends FacetsCollector {
|
|||
|
||||
return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -308,10 +308,11 @@ public class WeightedSpanTermExtractor {
|
|||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
// collect span positions
|
||||
while (spans.next()) {
|
||||
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
|
||||
spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (spanPositions.size() == 0) {
|
||||
|
|
|
@ -681,7 +681,7 @@ public class TestMultiTermHighlighting extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
|
||||
Query query = new SpanNearQuery(new SpanQuery[] { childQuery }, 0, true);
|
||||
Query query = new SpanNearQuery(new SpanQuery[] { childQuery, childQuery }, 0, false);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits);
|
||||
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
|
||||
|
|
|
@ -895,7 +895,7 @@ public class DocTermOrds implements Accountable {
|
|||
try {
|
||||
return getOrdTermsEnum(reader);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -145,7 +145,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary {
|
|||
try {
|
||||
currentWeightValues = weightsValueSource.getValues(new HashMap<String, Object>(), leaves.get(currentLeafIndex));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return currentWeightValues.longVal(docId - starts[subIndex]);
|
||||
|
|
|
@ -113,7 +113,7 @@ public class FileDictionary implements Dictionary {
|
|||
try {
|
||||
return new FileIterator();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2681,7 +2681,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
}
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -58,6 +58,9 @@ Other Changes
|
|||
|
||||
* SOLR-6954: Deprecated SolrClient.shutdown() method removed (Alan Woodward)
|
||||
|
||||
================== 5.2.0 ==================
|
||||
(No Changes)
|
||||
|
||||
================== 5.1.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
@ -91,15 +94,6 @@ Upgrading from Solr 5.0
|
|||
contain some LGPL-only code. Until that's resolved by Tika, you can download the
|
||||
.jar yourself and place it under contrib/extraction/lib.
|
||||
|
||||
* A twitter engineer discovered a JVM bug that causes GC pause problems. The
|
||||
workaround for those problems makes certain functionality impossible, such as
|
||||
running the jstat program on your Solr instance. That workaround has been
|
||||
implemented in the bin/solr start scripts. If you need the missing java
|
||||
functionality, delete the "-XX:+PerfDisableSharedMem" parameter from
|
||||
bin/solr.in.sh or bin/solr.in.cmd.
|
||||
|
||||
http://www.evanjones.ca/jvm-mmap-pause.html
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
|
@ -350,11 +344,8 @@ Bug Fixes
|
|||
* SOLR-7309: Make bin/solr, bin/post work when Solr installation directory contains spaces
|
||||
(Ramkumar Aiyengar, Martijn Koster)
|
||||
|
||||
* SOLR-7319: Workaround for the "Four Month Bug" GC pause problem discovered
|
||||
by a twitter software engineer. This causes GC pauses when JVM statistics
|
||||
are left enabled and there is heavy MMAP write activity.
|
||||
http://www.evanjones.ca/jvm-mmap-pause.html
|
||||
(Shawn Heisey)
|
||||
* SOLR-6924: The config API forcefully refreshes all replicas in the collection to ensure all are
|
||||
updated (Noble Paul)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
@ -368,6 +359,9 @@ Optimizations
|
|||
* SOLR-7239: improved performance of min & max in StatsComponent, as well as situations
|
||||
where local params disable all stats (hossman)
|
||||
|
||||
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
|
||||
(Stephan Lagraulet via Varun Thacker)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -40,7 +40,6 @@ set GC_TUNE=-XX:NewRatio=3 ^
|
|||
-XX:+UseCMSInitiatingOccupancyOnly ^
|
||||
-XX:CMSInitiatingOccupancyFraction=50 ^
|
||||
-XX:CMSMaxAbortablePrecleanTime=6000 ^
|
||||
-XX:+PerfDisableSharedMem ^
|
||||
-XX:+CMSParallelRemarkEnabled ^
|
||||
-XX:+ParallelRefProcEnabled
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ GC_TUNE="-XX:NewRatio=3 \
|
|||
-XX:PretenureSizeThreshold=64m \
|
||||
-XX:+UseCMSInitiatingOccupancyOnly \
|
||||
-XX:CMSInitiatingOccupancyFraction=50 \
|
||||
-XX:+PerfDisableSharedMem \
|
||||
-XX:CMSMaxAbortablePrecleanTime=6000 \
|
||||
-XX:+CMSParallelRemarkEnabled \
|
||||
-XX:+ParallelRefProcEnabled"
|
||||
|
|
|
@ -103,7 +103,7 @@ public class RegexRulesPasswordProvider implements PasswordProvider {
|
|||
}
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
|
|
|
@ -2175,9 +2175,10 @@ public final class ZkController {
|
|||
*
|
||||
* @return true on success
|
||||
*/
|
||||
public static boolean persistConfigResourceToZooKeeper(ZkSolrResourceLoader zkLoader, int znodeVersion,
|
||||
public static int persistConfigResourceToZooKeeper(ZkSolrResourceLoader zkLoader, int znodeVersion,
|
||||
String resourceName, byte[] content,
|
||||
boolean createIfNotExists) {
|
||||
int latestVersion = znodeVersion;
|
||||
final ZkController zkController = zkLoader.getZkController();
|
||||
final SolrZkClient zkClient = zkController.getZkClient();
|
||||
final String resourceLocation = zkLoader.getConfigSetZkPath() + "/" + resourceName;
|
||||
|
@ -2185,17 +2186,19 @@ public final class ZkController {
|
|||
try {
|
||||
try {
|
||||
zkClient.setData(resourceLocation, content, znodeVersion, true);
|
||||
latestVersion = znodeVersion + 1;// if the set succeeded , it should have incremented the version by one always
|
||||
log.info("Persisted config data to node {} ", resourceLocation);
|
||||
touchConfDir(zkLoader);
|
||||
} catch (NoNodeException e) {
|
||||
if (createIfNotExists) {
|
||||
try {
|
||||
zkClient.create(resourceLocation, content, CreateMode.PERSISTENT, true);
|
||||
latestVersion = 0;//just created so version must be zero
|
||||
touchConfDir(zkLoader);
|
||||
} catch (KeeperException.NodeExistsException nee) {
|
||||
try {
|
||||
Stat stat = zkClient.exists(resourceLocation, null, true);
|
||||
log.info("failed to set data version in zk is {0} and expected version is {1} ", stat.getVersion(), znodeVersion);
|
||||
log.info("failed to set data version in zk is {} and expected version is {} ", stat.getVersion(), znodeVersion);
|
||||
} catch (Exception e1) {
|
||||
log.warn("could not get stat");
|
||||
}
|
||||
|
@ -2227,7 +2230,7 @@ public final class ZkController {
|
|||
log.error(msg, e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
|
||||
}
|
||||
return true;
|
||||
return latestVersion;
|
||||
}
|
||||
|
||||
public static void touchConfDir(ZkSolrResourceLoader zkLoader) {
|
||||
|
|
|
@ -187,14 +187,14 @@ public class ConfigOverlay implements MapSerializable {
|
|||
|
||||
public static final String RESOURCE_NAME = "configoverlay.json";
|
||||
|
||||
private static final Long STR_ATTR = 0L;
|
||||
/*private static final Long STR_ATTR = 0L;
|
||||
private static final Long STR_NODE = 1L;
|
||||
private static final Long BOOL_ATTR = 10L;
|
||||
private static final Long BOOL_NODE = 11L;
|
||||
private static final Long INT_ATTR = 20L;
|
||||
private static final Long INT_NODE = 21L;
|
||||
private static final Long FLOAT_ATTR = 30L;
|
||||
private static final Long FLOAT_NODE = 31L;
|
||||
private static final Long FLOAT_NODE = 31L;*/
|
||||
|
||||
private static Map editable_prop_map;
|
||||
//The path maps to the xml xpath and value of 1 means it is a tag with a string value and value
|
||||
|
|
|
@ -148,6 +148,7 @@ public class RequestParams implements MapSerializable {
|
|||
ZkSolrResourceLoader resourceLoader = (ZkSolrResourceLoader) loader;
|
||||
try {
|
||||
Stat stat = resourceLoader.getZkController().getZkClient().exists(resourceLoader.getConfigSetZkPath() + "/" + RequestParams.RESOURCE, null, true);
|
||||
log.debug("latest version of {} in ZK is : {}", resourceLoader.getConfigSetZkPath() + "/" + RequestParams.RESOURCE, stat == null ? "": stat.getVersion());
|
||||
if (stat == null) {
|
||||
requestParams = new RequestParams(Collections.EMPTY_MAP, -1);
|
||||
} else if (requestParams == null || stat.getVersion() > requestParams.getZnodeVersion()) {
|
||||
|
|
|
@ -77,6 +77,7 @@ import java.util.UUID;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.apache.solr.core.ConfigOverlay.ZNODEVER;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.LAZY;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.MULTI_OK;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.NOOP;
|
||||
|
@ -819,7 +820,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
@Override
|
||||
public Map<String, Object> toMap() {
|
||||
LinkedHashMap result = new LinkedHashMap();
|
||||
if (getZnodeVersion() > -1) result.put("znodeVersion", getZnodeVersion());
|
||||
if (getZnodeVersion() > -1) result.put(ZNODEVER, getZnodeVersion());
|
||||
result.put("luceneMatchVersion", luceneMatchVersion);
|
||||
result.put("updateHandler", getUpdateHandlerInfo().toMap());
|
||||
Map m = new LinkedHashMap();
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
|
@ -89,6 +90,7 @@ import org.apache.solr.handler.RequestHandlerBase;
|
|||
import org.apache.solr.handler.admin.ShowFileRequestHandler;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.logging.MDCUtils;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.BinaryResponseWriter;
|
||||
|
@ -132,7 +134,6 @@ import org.apache.solr.update.processor.RunUpdateProcessorFactory;
|
|||
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorChain.ProcessorInfo;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
||||
import org.apache.solr.util.ConcurrentLRUCache;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.PropertiesInputStream;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
|
@ -691,6 +692,8 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
public SolrCore(String name, String dataDir, SolrConfig config, IndexSchema schema, CoreDescriptor cd, UpdateHandler updateHandler, IndexDeletionPolicyWrapper delPolicy, SolrCore prev) {
|
||||
coreDescriptor = cd;
|
||||
this.setName( name );
|
||||
MDCUtils.setCore(name); // show the core name in the error logs
|
||||
|
||||
resourceLoader = config.getResourceLoader();
|
||||
this.solrConfig = config;
|
||||
|
||||
|
@ -2077,13 +2080,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
HashMap<String, QueryResponseWriter> m= new HashMap<>();
|
||||
m.put("xml", new XMLResponseWriter());
|
||||
m.put("standard", m.get("xml"));
|
||||
m.put("json", new JSONResponseWriter());
|
||||
m.put(CommonParams.JSON, new JSONResponseWriter());
|
||||
m.put("python", new PythonResponseWriter());
|
||||
m.put("php", new PHPResponseWriter());
|
||||
m.put("phps", new PHPSerializedResponseWriter());
|
||||
m.put("ruby", new RubyResponseWriter());
|
||||
m.put("raw", new RawResponseWriter());
|
||||
m.put("javabin", new BinaryResponseWriter());
|
||||
m.put(CommonParams.JAVABIN, new BinaryResponseWriter());
|
||||
m.put("csv", new CSVResponseWriter());
|
||||
m.put("xsort", new SortingResponseWriter());
|
||||
m.put("schema.xml", new SchemaXmlResponseWriter());
|
||||
|
@ -2463,12 +2466,12 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
zkSolrResourceLoader.getZkController().registerConfListenerForCore(
|
||||
zkSolrResourceLoader.getConfigSetZkPath(),
|
||||
this,
|
||||
getListener(this, zkSolrResourceLoader));
|
||||
getConfListener(this, zkSolrResourceLoader));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static Runnable getListener(SolrCore core, ZkSolrResourceLoader zkSolrResourceLoader) {
|
||||
public static Runnable getConfListener(SolrCore core, ZkSolrResourceLoader zkSolrResourceLoader) {
|
||||
final String coreName = core.getName();
|
||||
final CoreContainer cc = core.getCoreDescriptor().getCoreContainer();
|
||||
final String overlayPath = zkSolrResourceLoader.getConfigSetZkPath() + "/" + ConfigOverlay.RESOURCE_NAME;
|
||||
|
@ -2506,9 +2509,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
cc.reload(coreName);
|
||||
return;
|
||||
}
|
||||
//some files in conf directoy has changed other than schema.xml,
|
||||
// solrconfig.xml. so fire event listeners
|
||||
|
||||
//some files in conf directory may have other than managedschema, overlay, params
|
||||
try (SolrCore core = cc.solrCores.getCoreFromAnyList(coreName, true)) {
|
||||
if (core == null || core.isClosed()) return;
|
||||
for (Runnable listener : core.confListeners) {
|
||||
|
|
|
@ -363,7 +363,9 @@ public class IndexFetcher {
|
|||
|
||||
try {
|
||||
|
||||
if (isIndexStale(indexDir)) {
|
||||
//We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
|
||||
//in the metadata. If there is a mismatch for the same index file then we download the entire index again.
|
||||
if (!isFullCopyNeeded && isIndexStale(indexDir)) {
|
||||
isFullCopyNeeded = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.solr.handler;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -29,35 +30,63 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.cloud.ZkCLI;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.cloud.ZkSolrResourceLoader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.ConfigOverlay;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.ImplicitPlugins;
|
||||
import org.apache.solr.core.RequestParams;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.BinaryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.SchemaManager;
|
||||
import org.apache.solr.util.CommandOperation;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
|
||||
import static org.apache.solr.common.params.CoreAdminParams.NAME;
|
||||
import static org.apache.solr.common.util.StrUtils.formatString;
|
||||
import static org.apache.solr.core.ConfigOverlay.NOT_EDITABLE;
|
||||
import static org.apache.solr.core.ConfigOverlay.ZNODEVER;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_CLASS;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_NAME;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.REQUIRE_NAME_IN_OVERLAY;
|
||||
|
@ -67,6 +96,7 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
public static final Logger log = LoggerFactory.getLogger(SolrConfigHandler.class);
|
||||
public static final boolean configEditing_disabled = Boolean.getBoolean("disable.configEdit");
|
||||
private static final Map<String, SolrConfig.SolrPluginInfo> namedPlugins;
|
||||
private Lock reloadLock = new ReentrantLock(true);
|
||||
|
||||
static {
|
||||
Map<String, SolrConfig.SolrPluginInfo> map = new HashMap<>();
|
||||
|
@ -99,7 +129,7 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
|
||||
private static class Command {
|
||||
private class Command {
|
||||
private final SolrQueryRequest req;
|
||||
private final SolrQueryResponse resp;
|
||||
private final String method;
|
||||
|
@ -122,6 +152,7 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
|
||||
private void handleGET() {
|
||||
if (parts.size() == 1) {
|
||||
//this is the whole config. sent out the whole payload
|
||||
resp.add("config", getConfigDetails());
|
||||
} else {
|
||||
if (ConfigOverlay.NAME.equals(parts.get(1))) {
|
||||
|
@ -131,9 +162,9 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
RequestParams params = req.getCore().getSolrConfig().getRequestParams();
|
||||
MapSolrParams p = params.getParams(parts.get(2));
|
||||
Map m = new LinkedHashMap<>();
|
||||
m.put(ConfigOverlay.ZNODEVER, params.getZnodeVersion());
|
||||
m.put(ZNODEVER, params.getZnodeVersion());
|
||||
if (p != null) {
|
||||
m.put(RequestParams.NAME, ZkNodeProps.makeMap(parts.get(2), p.getMap()));
|
||||
m.put(RequestParams.NAME, makeMap(parts.get(2), p.getMap()));
|
||||
}
|
||||
resp.add(SolrQueryResponse.NAME, m);
|
||||
} else {
|
||||
|
@ -141,8 +172,53 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
} else {
|
||||
Map<String, Object> m = getConfigDetails();
|
||||
resp.add("config", ZkNodeProps.makeMap(parts.get(1), m.get(parts.get(1))));
|
||||
if (ZNODEVER.equals(parts.get(1))) {
|
||||
resp.add(ZNODEVER, ZkNodeProps.makeMap(
|
||||
ConfigOverlay.NAME, req.getCore().getSolrConfig().getOverlay().getZnodeVersion(),
|
||||
RequestParams.NAME, req.getCore().getSolrConfig().getRequestParams().getZnodeVersion()));
|
||||
boolean checkStale = false;
|
||||
int expectedVersion = req.getParams().getInt(ConfigOverlay.NAME, -1);
|
||||
int actualVersion = req.getCore().getSolrConfig().getOverlay().getZnodeVersion();
|
||||
if (expectedVersion > actualVersion) {
|
||||
log.info("expecting overlay version {} but my version is {}", expectedVersion, actualVersion);
|
||||
checkStale = true;
|
||||
} else if (expectedVersion != -1) {
|
||||
log.info("I already have the expected version {} of config", expectedVersion);
|
||||
}
|
||||
expectedVersion = req.getParams().getInt(RequestParams.NAME, -1);
|
||||
actualVersion = req.getCore().getSolrConfig().getRequestParams().getZnodeVersion();
|
||||
if (expectedVersion > actualVersion) {
|
||||
log.info("expecting params version {} but my version is {}", expectedVersion, actualVersion);
|
||||
checkStale = true;
|
||||
} else if (expectedVersion != -1) {
|
||||
log.info("I already have the expected version {} of params", expectedVersion);
|
||||
}
|
||||
if (checkStale && req.getCore().getResourceLoader() instanceof ZkSolrResourceLoader) {
|
||||
new Thread(SolrConfigHandler.class.getSimpleName() + "-refreshconf") {
|
||||
@Override
|
||||
public void run() {
|
||||
if (!reloadLock.tryLock()) {
|
||||
log.info("Another reload is in progress . Not doing anything");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
log.info("Trying to update my configs");
|
||||
SolrCore.getConfListener(req.getCore(), (ZkSolrResourceLoader) req.getCore().getResourceLoader()).run();
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to refresh conf ", e);
|
||||
} finally {
|
||||
reloadLock.unlock();
|
||||
}
|
||||
}
|
||||
}.start();
|
||||
} else {
|
||||
log.info("checkStale {} , resourceloader {}", checkStale, req.getCore().getResourceLoader().getClass().getName());
|
||||
}
|
||||
|
||||
} else {
|
||||
Map<String, Object> m = getConfigDetails();
|
||||
resp.add("config", makeMap(parts.get(1), m.get(parts.get(1))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -277,8 +353,15 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
if (ops.isEmpty()) {
|
||||
ZkController.touchConfDir(zkLoader);
|
||||
} else {
|
||||
ZkController.persistConfigResourceToZooKeeper(zkLoader, params.getZnodeVersion(),
|
||||
RequestParams.RESOURCE, params.toByteArray(), true);
|
||||
log.info("persisting params version : {}", params.toMap());
|
||||
int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader,
|
||||
params.getZnodeVersion(),
|
||||
RequestParams.RESOURCE,
|
||||
params.toByteArray(), true);
|
||||
waitForAllReplicasState(req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(),
|
||||
req.getCore().getCoreDescriptor().getCoreContainer().getZkController(),
|
||||
RequestParams.NAME,
|
||||
latestVersion, 30);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -326,17 +409,20 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
}
|
||||
List errs = CommandOperation.captureErrors(ops);
|
||||
if (!errs.isEmpty()) {
|
||||
log.info("Failed to run commands errors are {}", StrUtils.join(errs, ','));
|
||||
log.info("Failed to run commands. errors are {}", StrUtils.join(errs, ','));
|
||||
resp.add(CommandOperation.ERR_MSGS, errs);
|
||||
return;
|
||||
}
|
||||
|
||||
SolrResourceLoader loader = req.getCore().getResourceLoader();
|
||||
if (loader instanceof ZkSolrResourceLoader) {
|
||||
ZkController.persistConfigResourceToZooKeeper((ZkSolrResourceLoader) loader, overlay.getZnodeVersion(),
|
||||
int latestVersion = ZkController.persistConfigResourceToZooKeeper((ZkSolrResourceLoader) loader, overlay.getZnodeVersion(),
|
||||
ConfigOverlay.RESOURCE_NAME, overlay.toByteArray(), true);
|
||||
|
||||
log.info("Executed config commands successfully and persited to ZK {}", ops);
|
||||
log.info("Executed config commands successfully and persisted to ZK {}", ops);
|
||||
waitForAllReplicasState(req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(),
|
||||
req.getCore().getCoreDescriptor().getCoreContainer().getZkController(),
|
||||
ConfigOverlay.NAME,
|
||||
latestVersion, 30);
|
||||
} else {
|
||||
SolrResourceLoader.persistConfLocally(loader, ConfigOverlay.RESOURCE_NAME, overlay.toByteArray());
|
||||
req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
|
||||
|
@ -519,7 +605,7 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
|
||||
|
||||
private static Set<String> subPaths = new HashSet<>(Arrays.asList("/overlay", "/params",
|
||||
"/query", "/jmx", "/requestDispatcher"));
|
||||
"/query", "/jmx", "/requestDispatcher", "/znodeVersion"));
|
||||
|
||||
static {
|
||||
for (SolrConfig.SolrPluginInfo solrPluginInfo : SolrConfig.plugins)
|
||||
|
@ -556,4 +642,170 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
public static final String CREATE = "create";
|
||||
private static Set<String> cmdPrefixes = ImmutableSet.of(CREATE, UPDATE, "delete", "add");
|
||||
|
||||
/**
|
||||
* Block up to a specified maximum time until we see agreement on the schema
|
||||
* version in ZooKeeper across all replicas for a collection.
|
||||
*/
|
||||
private static void waitForAllReplicasState(String collection,
|
||||
ZkController zkController,
|
||||
String prop,
|
||||
int expectedVersion,
|
||||
int maxWaitSecs) {
|
||||
long startMs = System.currentTimeMillis();
|
||||
// get a list of active replica cores to query for the schema zk version (skipping this core of course)
|
||||
List<PerReplicaCallable> concurrentTasks = new ArrayList<>();
|
||||
|
||||
for (String coreUrl : getActiveReplicaCoreUrls(zkController, collection)) {
|
||||
PerReplicaCallable e = new PerReplicaCallable(coreUrl, prop, expectedVersion, maxWaitSecs);
|
||||
concurrentTasks.add(e);
|
||||
}
|
||||
if (concurrentTasks.isEmpty()) return; // nothing to wait for ...
|
||||
|
||||
log.info(formatString("Waiting up to {0} secs for {1} replicas to set the property {2} to be of version {3} for collection {4}",
|
||||
maxWaitSecs, concurrentTasks.size(), prop, expectedVersion, collection));
|
||||
|
||||
// use an executor service to invoke schema zk version requests in parallel with a max wait time
|
||||
int poolSize = Math.min(concurrentTasks.size(), 10);
|
||||
ExecutorService parallelExecutor =
|
||||
Executors.newFixedThreadPool(poolSize, new DefaultSolrThreadFactory("solrHandlerExecutor"));
|
||||
try {
|
||||
List<Future<Boolean>> results =
|
||||
parallelExecutor.invokeAll(concurrentTasks, maxWaitSecs, TimeUnit.SECONDS);
|
||||
|
||||
// determine whether all replicas have the update
|
||||
List<String> failedList = null; // lazily init'd
|
||||
for (int f = 0; f < results.size(); f++) {
|
||||
Boolean success = false;
|
||||
Future<Boolean> next = results.get(f);
|
||||
if (next.isDone() && !next.isCancelled()) {
|
||||
// looks to have finished, but need to check if it succeeded
|
||||
try {
|
||||
success = next.get();
|
||||
} catch (ExecutionException e) {
|
||||
// shouldn't happen since we checked isCancelled
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
String coreUrl = concurrentTasks.get(f).coreUrl;
|
||||
log.warn("Core " + coreUrl + "could not get the expected version " + expectedVersion);
|
||||
if (failedList == null) failedList = new ArrayList<>();
|
||||
failedList.add(coreUrl);
|
||||
}
|
||||
}
|
||||
|
||||
// if any tasks haven't completed within the specified timeout, it's an error
|
||||
if (failedList != null)
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
formatString("{0} out of {1} the property {2} to be of version {3} within {4} seconds! Failed cores: {5}",
|
||||
failedList.size(), concurrentTasks.size() + 1, prop, expectedVersion, maxWaitSecs, failedList));
|
||||
|
||||
} catch (InterruptedException ie) {
|
||||
log.warn(formatString(
|
||||
"Core was interrupted . trying to set the property {1} to version {2} to propagate to {3} replicas for collection {4}",
|
||||
prop, expectedVersion, concurrentTasks.size(), collection));
|
||||
Thread.currentThread().interrupt();
|
||||
} finally {
|
||||
if (!parallelExecutor.isShutdown())
|
||||
parallelExecutor.shutdownNow();
|
||||
}
|
||||
|
||||
long diffMs = (System.currentTimeMillis() - startMs);
|
||||
log.info(formatString(
|
||||
"Took {0} secs to set the property {1} to be of version {2} for collection {3}",
|
||||
Math.round(diffMs / 1000d), prop, expectedVersion, collection));
|
||||
}
|
||||
|
||||
public static List<String> getActiveReplicaCoreUrls(ZkController zkController,
|
||||
String collection) {
|
||||
List<String> activeReplicaCoreUrls = new ArrayList<>();
|
||||
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
|
||||
Set<String> liveNodes = clusterState.getLiveNodes();
|
||||
Collection<Slice> activeSlices = clusterState.getActiveSlices(collection);
|
||||
if (activeSlices != null && activeSlices.size() > 0) {
|
||||
for (Slice next : activeSlices) {
|
||||
Map<String, Replica> replicasMap = next.getReplicasMap();
|
||||
if (replicasMap != null) {
|
||||
for (Map.Entry<String, Replica> entry : replicasMap.entrySet()) {
|
||||
Replica replica = entry.getValue();
|
||||
if (ZkStateReader.ACTIVE.equals(replica.getStr(ZkStateReader.STATE_PROP)) &&
|
||||
liveNodes.contains(replica.getNodeName())) {
|
||||
activeReplicaCoreUrls.add(replica.getCoreUrl());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return activeReplicaCoreUrls;
|
||||
}
|
||||
|
||||
private static class PerReplicaCallable extends SolrRequest implements Callable<Boolean> {
|
||||
String coreUrl;
|
||||
String prop;
|
||||
int expectedZkVersion;
|
||||
Number remoteVersion = null;
|
||||
int maxWait;
|
||||
|
||||
PerReplicaCallable(String coreUrl, String prop, int expectedZkVersion, int maxWait) {
|
||||
super(METHOD.GET, "/config/" + ZNODEVER);
|
||||
this.coreUrl = coreUrl;
|
||||
this.expectedZkVersion = expectedZkVersion;
|
||||
this.prop = prop;
|
||||
this.maxWait = maxWait;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrParams getParams() {
|
||||
return new ModifiableSolrParams()
|
||||
.set(prop, expectedZkVersion)
|
||||
.set(CommonParams.WT, CommonParams.JAVABIN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean call() throws Exception {
|
||||
long startTime = System.currentTimeMillis();
|
||||
int attempts = 0;
|
||||
try (HttpSolrClient solr = new HttpSolrClient(coreUrl)) {
|
||||
// eventually, this loop will get killed by the ExecutorService's timeout
|
||||
while (true) {
|
||||
try {
|
||||
long timeElapsed = (System.currentTimeMillis() - startTime) / 1000;
|
||||
if (timeElapsed >= maxWait) {
|
||||
return false;
|
||||
}
|
||||
log.info("Time elapsed : {} secs, maxWait {}", timeElapsed, maxWait);
|
||||
Thread.sleep(100);
|
||||
NamedList<Object> resp = solr.httpUriRequest(this).future.get();
|
||||
if (resp != null) {
|
||||
Map m = (Map) resp.get(ZNODEVER);
|
||||
if (m != null) {
|
||||
remoteVersion = (Number) m.get(prop);
|
||||
if (remoteVersion != null && remoteVersion.intValue() >= expectedZkVersion) break;
|
||||
}
|
||||
}
|
||||
|
||||
attempts++;
|
||||
log.info(formatString("Could not get expectedVersion {0} from {1} for prop {2} after {3} attempts", expectedZkVersion, coreUrl, prop, attempts));
|
||||
} catch (Exception e) {
|
||||
if (e instanceof InterruptedException) {
|
||||
break; // stop looping
|
||||
} else {
|
||||
log.warn("Failed to get /schema/zkversion from " + coreUrl + " due to: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ContentStream> getContentStreams() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SolrResponse createResponse(SolrClient client) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ public class LoggingHandler extends RequestHandlerBase implements SolrCoreAware
|
|||
SimpleOrderedMap<Object> info = new SimpleOrderedMap<>();
|
||||
if(time>0) {
|
||||
info.add("since", time);
|
||||
info.add("found", found);
|
||||
info.add("found", found.get());
|
||||
}
|
||||
else {
|
||||
info.add("levels", watcher.getAllLevels()); // show for the first request
|
||||
|
|
|
@ -199,9 +199,7 @@ public class HttpShardHandler extends ShardHandler {
|
|||
params.remove(CommonParams.WT); // use default (currently javabin)
|
||||
params.remove(CommonParams.VERSION);
|
||||
|
||||
// SolrRequest req = new QueryRequest(SolrRequest.METHOD.POST, "/select");
|
||||
// use generic request to avoid extra processing of queries
|
||||
QueryRequest req = new QueryRequest(params);
|
||||
QueryRequest req = makeQueryRequest(sreq, params, shard);
|
||||
req.setMethod(SolrRequest.METHOD.POST);
|
||||
|
||||
// no need to set the response parser as binary is the default
|
||||
|
@ -239,13 +237,30 @@ public class HttpShardHandler extends ShardHandler {
|
|||
|
||||
ssr.elapsedTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
|
||||
|
||||
return srsp;
|
||||
return transfomResponse(sreq, srsp, shard);
|
||||
}
|
||||
};
|
||||
|
||||
pending.add( completionService.submit(task) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses could modify the request based on the shard
|
||||
*/
|
||||
protected QueryRequest makeQueryRequest(final ShardRequest sreq, ModifiableSolrParams params, String shard)
|
||||
{
|
||||
// use generic request to avoid extra processing of queries
|
||||
return new QueryRequest(params);
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses could modify the Response based on the the shard
|
||||
*/
|
||||
protected ShardResponse transfomResponse(final ShardRequest sreq, ShardResponse rsp, String shard)
|
||||
{
|
||||
return rsp;
|
||||
}
|
||||
|
||||
/** returns a ShardResponse of the last response correlated with a ShardRequest. This won't
|
||||
* return early if it runs into an error.
|
||||
**/
|
||||
|
|
|
@ -16,19 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.logging.log4j;
|
||||
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import org.apache.log4j.AppenderSkeleton;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.apache.log4j.spi.ThrowableInformation;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.logging.CircularList;
|
||||
import org.apache.solr.logging.ListenerConfig;
|
||||
import org.apache.solr.logging.LogWatcher;
|
||||
import org.apache.solr.logging.LoggerInfo;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
|
@ -37,6 +24,20 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.log4j.AppenderSkeleton;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.apache.log4j.spi.ThrowableInformation;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.logging.CircularList;
|
||||
import org.apache.solr.logging.ListenerConfig;
|
||||
import org.apache.solr.logging.LogWatcher;
|
||||
import org.apache.solr.logging.LoggerInfo;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
|
||||
public class Log4jWatcher extends LogWatcher<LoggingEvent> {
|
||||
|
||||
final String name;
|
||||
|
@ -157,6 +158,12 @@ public class Log4jWatcher extends LogWatcher<LoggingEvent> {
|
|||
if(t!=null) {
|
||||
doc.setField("trace", Throwables.getStackTraceAsString(t.getThrowable()));
|
||||
}
|
||||
|
||||
// Will be null if not present
|
||||
doc.setField("core", event.getMDC(ZkStateReader.CORE_NAME_PROP));
|
||||
doc.setField("collection", event.getMDC(ZkStateReader.COLLECTION_PROP));
|
||||
doc.setField("replica", event.getMDC(ZkStateReader.REPLICA_PROP));
|
||||
doc.setField("shard", event.getMDC(ZkStateReader.SHARD_ID_PROP));
|
||||
return doc;
|
||||
}
|
||||
}
|
|
@ -1494,7 +1494,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
try {
|
||||
queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance
|
||||
} catch (SyntaxError e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// Phrase slop array
|
||||
int pslop[] = new int[4];
|
||||
|
|
|
@ -99,7 +99,7 @@ public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
|
|||
try {
|
||||
expression = JavascriptCompiler.compile(weightExpression);
|
||||
} catch (ParseException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
SimpleBindings bindings = new SimpleBindings();
|
||||
for (SortField sortField : sortFields) {
|
||||
|
|
|
@ -55,7 +55,7 @@ public class FileDictionaryFactory extends DictionaryFactory {
|
|||
return new FileDictionary(new InputStreamReader(
|
||||
core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8), fieldDelimiter);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
|
|||
}
|
||||
};
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -518,7 +518,7 @@ public class SimplePostTool {
|
|||
Thread.sleep(delay * 1000);
|
||||
filesPosted++;
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return filesPosted;
|
||||
|
@ -610,7 +610,7 @@ public class SimplePostTool {
|
|||
} catch (IOException e) {
|
||||
warn("Caught exception when trying to open connection to "+u+": "+e.getMessage());
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
if(!subStack.isEmpty()) {
|
||||
|
@ -1209,7 +1209,7 @@ public class SimplePostTool {
|
|||
} catch (IOException e) {
|
||||
warn("IOException opening URL "+url+": "+e.getMessage());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.solr.core;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
|
@ -41,7 +41,6 @@ import static org.apache.solr.handler.TestSolrConfigHandlerCloud.compareValues;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
@LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-6924")
|
||||
public class TestReqParamsAPI extends AbstractFullDistribZkTestBase {
|
||||
static final Logger log = LoggerFactory.getLogger(TestSolrConfigHandlerCloud.class);
|
||||
private List<RestTestHarness> restTestHarnesses = new ArrayList<>();
|
||||
|
|
|
@ -21,6 +21,8 @@ import org.noggit.JSONUtil;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
|
||||
|
||||
public class Replica extends ZkNodeProps {
|
||||
private final String name;
|
||||
|
@ -35,6 +37,9 @@ public class Replica extends ZkNodeProps {
|
|||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public String getCoreUrl() {
|
||||
return ZkCoreNodeProps.getCoreUrl(getStr(BASE_URL_PROP), getStr(CORE_NAME_PROP));
|
||||
}
|
||||
|
||||
/** The name of the node this replica resides on */
|
||||
public String getNodeName() {
|
||||
|
|
|
@ -224,5 +224,9 @@ public interface CommonParams {
|
|||
* When querying a node, prefer local node's cores for distributed queries.
|
||||
*/
|
||||
public static final String PREFER_LOCAL_SHARDS = "preferLocalShards";
|
||||
|
||||
public static final String JAVABIN = "javabin";
|
||||
|
||||
public static final String JSON = "json";
|
||||
}
|
||||
|
||||
|
|
|
@ -361,6 +361,7 @@ var load_logging_viewer = function()
|
|||
content += '<tr class="' + classes.join( ' ' ) + '">' + "\n";
|
||||
content += '<td class="span"><a><span>' + format_time( doc.time ) + '</span></a></td>' + "\n";
|
||||
content += '<td class="level span"><a><span>' + doc.level.esc() + '</span></span></a></td>' + "\n";
|
||||
content += '<td class="span"><a><span>' + doc.core + '</span></a></td>' + "\n";
|
||||
content += '<td class="span"><a><span>' + doc.logger + '</span></a></td>' + "\n";
|
||||
content += '<td class="message span"><a><span>' + doc.message.replace( /,/g, ',​' ).esc() + '</span></a></td>' + "\n";
|
||||
content += '</tr>' + "\n";
|
||||
|
@ -433,6 +434,7 @@ sammy.get
|
|||
'<tr>' + "\n" +
|
||||
'<th class="time">Time (<span>Local</span>)</th>' + "\n" +
|
||||
'<th class="level">Level</th>' + "\n" +
|
||||
'<th class="core">Core</th>' + "\n" +
|
||||
'<th class="logger">Logger</th>' + "\n" +
|
||||
'<th class="message">Message</th>' + "\n" +
|
||||
'</tr>' + "\n" +
|
||||
|
|
Loading…
Reference in New Issue