mirror of https://github.com/apache/lucene.git
merge trunk up to r1671137
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1671151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
363fc49258
|
@ -40,10 +40,15 @@ API Changes
|
|||
|
||||
New Features
|
||||
|
||||
* LUCENE-6308: Span queries now share document conjunction/intersection
|
||||
* LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share
|
||||
document conjunction/intersection
|
||||
code with boolean queries, and use two-phased iterators for
|
||||
faster intersection by avoiding loading positions in certain cases.
|
||||
(Paul Elschot, Robert Muir via Mike McCandless)
|
||||
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
|
||||
|
||||
* LUCENE-6352: Added a new query time join to the join module that uses
|
||||
global ordinals, which is faster for subsequent joins between reopens.
|
||||
(Martijn van Groningen, Adrien Grand)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
@ -52,6 +57,9 @@ Optimizations
|
|||
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
|
||||
Grand, Mike McCandless)
|
||||
|
||||
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
|
||||
(Robert Muir)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
|
||||
|
@ -123,6 +131,10 @@ Bug Fixes
|
|||
DocumentsWriterStallControl to prevent hangs during indexing if we
|
||||
miss a .notify/All somewhere (Mike McCandless)
|
||||
|
||||
* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state
|
||||
that up to 3X (X = current index size) spare disk space may be needed
|
||||
to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
|
||||
|
|
|
@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* longer be changed).</p>
|
||||
*
|
||||
* <p>Note that this requires free space that is proportional
|
||||
* to the size of the index in your Directory (2X if you're
|
||||
* using compound file format). For example, if your index
|
||||
* size is 10 MB then you need an additional 10 MB free for
|
||||
* this to complete (20 MB if you're using compound file
|
||||
* format). This is also affected by the {@link Codec} that
|
||||
* is used to execute the merge, and may result in even a
|
||||
* bigger index. Also, it's best to call {@link #commit()}
|
||||
* afterwards, to allow IndexWriter to free up disk space.</p>
|
||||
* to the size of the index in your Directory: 2X if you are
|
||||
* not using compound file format, and 3X if you are.
|
||||
* For example, if your index size is 10 MB then you need
|
||||
* an additional 20 MB free for this to complete (30 MB if
|
||||
* you're using compound file format). This is also affected
|
||||
* by the {@link Codec} that is used to execute the merge,
|
||||
* and may result in even a bigger index. Also, it's best
|
||||
* to call {@link #commit()} afterwards, to allow IndexWriter
|
||||
* to free up disk space.</p>
|
||||
*
|
||||
* <p>If some but not all readers re-open while merging
|
||||
* is underway, this will cause {@code > 2X} temporary
|
||||
|
|
|
@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
scratch.bytes = thePayload;
|
||||
scratch.offset = 0;
|
||||
scratch.length = thePayload.length;
|
||||
payloadScore = function.currentScore(doc, fieldName, start, end,
|
||||
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
|
||||
payloadScore = function.currentScore(docID(), fieldName, start, end,
|
||||
payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
|
||||
spans.startPosition(), spans.endPosition(), scratch));
|
||||
++payloadsSeen;
|
||||
}
|
||||
|
@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
|
||||
//
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
protected void setFreqCurrentDoc() throws IOException {
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
|
@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
getPayloads(spansArr);
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
|
||||
return super.score()
|
||||
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
|
||||
public float scoreCurrentDoc() throws IOException {
|
||||
return super.scoreCurrentDoc()
|
||||
* function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
protected void setFreqCurrentDoc() throws IOException {
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
payloadScore = 0;
|
||||
|
@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
startPos = spans.nextStartPosition();
|
||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||
return freq != 0;
|
||||
}
|
||||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
|
@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
final PostingsEnum postings = termSpans.getPostings();
|
||||
payload = postings.getPayload();
|
||||
if (payload != null) {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
payloadScore = function.currentScore(docID(), term.field(),
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
||||
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
|
||||
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
|
||||
} else {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
payloadScore = function.currentScore(docID(), term.field(),
|
||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
||||
}
|
||||
payloadsSeen++;
|
||||
|
@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
* @throws IOException if there is a low-level I/O error
|
||||
*/
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
|
||||
public float scoreCurrentDoc() throws IOException {
|
||||
return includeSpanScore ? getSpanScore() * getPayloadScore()
|
||||
: getPayloadScore();
|
||||
}
|
||||
|
@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
* @see #score()
|
||||
*/
|
||||
protected float getSpanScore() throws IOException {
|
||||
return super.score();
|
||||
return super.scoreCurrentDoc();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
* {@link PayloadFunction#docScore(int, String, int, float)}
|
||||
*/
|
||||
protected float getPayloadScore() {
|
||||
return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
|
||||
return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,11 +29,11 @@ import java.util.Objects;
|
|||
* Common super class for un/ordered Spans
|
||||
*/
|
||||
abstract class NearSpans extends Spans {
|
||||
SpanNearQuery query;
|
||||
int allowedSlop;
|
||||
final SpanNearQuery query;
|
||||
final int allowedSlop;
|
||||
|
||||
List<Spans> subSpans; // in query order
|
||||
DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
||||
final Spans[] subSpans; // in query order
|
||||
final DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
||||
boolean atFirstInCurrentDoc;
|
||||
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
|
||||
|
||||
|
@ -44,7 +44,7 @@ abstract class NearSpans extends Spans {
|
|||
if (subSpans.size() < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
|
||||
}
|
||||
this.subSpans = Objects.requireNonNull(subSpans); // in query order
|
||||
this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
|
||||
this.conjunction = ConjunctionDISI.intersect(subSpans);
|
||||
}
|
||||
|
||||
|
@ -91,13 +91,8 @@ abstract class NearSpans extends Spans {
|
|||
return res;
|
||||
}
|
||||
|
||||
private Spans[] subSpansArray = null; // init only when needed.
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
if (subSpansArray == null) {
|
||||
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
|
||||
}
|
||||
return subSpansArray;
|
||||
return subSpans;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,12 +18,8 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||
* where the subspans do not overlap and have a maximum slop between them,
|
||||
|
@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans {
|
|||
* otherwise at least one is exhausted in the current doc.
|
||||
*/
|
||||
private boolean stretchToOrder() throws IOException {
|
||||
Spans prevSpans = subSpans.get(0);
|
||||
Spans prevSpans = subSpans[0];
|
||||
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
|
||||
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
|
||||
for (int i = 1; i < subSpans.size(); i++) {
|
||||
Spans spans = subSpans.get(i);
|
||||
for (int i = 1; i < subSpans.length; i++) {
|
||||
Spans spans = subSpans[i];
|
||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||
assert spans.endPosition() != NO_MORE_POSITIONS;
|
||||
|
||||
|
@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans {
|
|||
* on all subSpans, except the last one, in reverse order.
|
||||
*/
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
Spans lastSubSpans = subSpans[subSpans.length - 1];
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
|
||||
int prevStart = prevSpans.startPosition();
|
||||
int prevEnd = prevSpans.endPosition();
|
||||
|
@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans {
|
|||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
lastEnd = prevEnd;
|
||||
}
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans {
|
|||
return atFirstInCurrentDoc ? -1 : matchEnd;
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Throws an UnsupportedOperationException */
|
||||
@Override
|
||||
public boolean isPayloadAvailable() {
|
||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
|||
* Also collect the payloads.
|
||||
*/
|
||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
||||
Spans lastSubSpans = subSpans[subSpans.length - 1];
|
||||
matchStart = lastSubSpans.startPosition();
|
||||
matchEnd = lastSubSpans.endPosition();
|
||||
|
||||
|
@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
|||
|
||||
int matchSlop = 0;
|
||||
int lastStart = matchStart;
|
||||
int lastEnd = matchEnd;
|
||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans.get(i);
|
||||
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||
Spans prevSpans = subSpans[i];
|
||||
|
||||
if (prevSpans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = prevSpans.getPayload();
|
||||
|
@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
|||
*/
|
||||
matchStart = prevStart;
|
||||
lastStart = prevStart;
|
||||
lastEnd = prevEnd;
|
||||
}
|
||||
|
||||
boolean match = matchSlop <= allowedSlop;
|
||||
|
|
|
@ -18,19 +18,17 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
@ -132,9 +130,14 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null; // field does not exist
|
||||
}
|
||||
|
||||
// all NearSpans require at least two subSpans
|
||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
||||
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
|
||||
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
|
||||
: new NearSpansOrdered(this, subSpans);
|
||||
}
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS;
|
||||
for (;;) {
|
||||
switch(acceptPosition(this)) {
|
||||
switch(acceptPosition(in)) {
|
||||
case YES:
|
||||
atFirstInCurrentDoc = true;
|
||||
return in.docID();
|
||||
|
@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
if (startPos == NO_MORE_POSITIONS) {
|
||||
return NO_MORE_POSITIONS;
|
||||
}
|
||||
switch(acceptPosition(this)) {
|
||||
switch(acceptPosition(in)) {
|
||||
case YES:
|
||||
return startPos;
|
||||
case NO:
|
||||
|
|
|
@ -21,48 +21,58 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/**
|
||||
* Public for extension only.
|
||||
*/
|
||||
public class SpanScorer extends Scorer {
|
||||
protected Spans spans;
|
||||
|
||||
protected int doc;
|
||||
protected float freq;
|
||||
protected int numMatches;
|
||||
/** underlying spans we are scoring from */
|
||||
protected final Spans spans;
|
||||
/** similarity used in default score impl */
|
||||
protected final Similarity.SimScorer docScorer;
|
||||
|
||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
|
||||
throws IOException {
|
||||
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
|
||||
protected float freq;
|
||||
/** number of matches (computed in setFreqCurrentDoc) */
|
||||
protected int numMatches;
|
||||
|
||||
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
|
||||
|
||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = Objects.requireNonNull(docScorer);
|
||||
this.spans = Objects.requireNonNull(spans);
|
||||
this.doc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int prevDoc = doc;
|
||||
doc = spans.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
public final int nextDoc() throws IOException {
|
||||
return spans.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int prevDoc = doc;
|
||||
doc = spans.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
setFreqCurrentDoc();
|
||||
}
|
||||
return doc;
|
||||
public final int advance(int target) throws IOException {
|
||||
return spans.advance(target);
|
||||
}
|
||||
|
||||
protected boolean setFreqCurrentDoc() throws IOException {
|
||||
/**
|
||||
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
|
||||
*/
|
||||
private final void ensureFreq() throws IOException {
|
||||
int currentDoc = spans.docID();
|
||||
if (lastScoredDoc != currentDoc) {
|
||||
setFreqCurrentDoc();
|
||||
lastScoredDoc = currentDoc;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets {@link #freq} and {@link #numMatches} for the current document.
|
||||
* <p>
|
||||
* This will be called at most once per document.
|
||||
*/
|
||||
protected void setFreqCurrentDoc() throws IOException {
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
|
||||
|
@ -90,34 +100,46 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
|
||||
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
|
||||
}
|
||||
|
||||
return true;
|
||||
/**
|
||||
* Score the current doc. The default implementation scores the doc
|
||||
* with the similarity using the slop-adjusted {@link #freq}.
|
||||
*/
|
||||
protected float scoreCurrentDoc() throws IOException {
|
||||
return docScorer.score(spans.docID(), freq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() { return doc; }
|
||||
public final int docID() { return spans.docID(); }
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
float s = docScorer.score(doc, freq);
|
||||
return s;
|
||||
public final float score() throws IOException {
|
||||
ensureFreq();
|
||||
return scoreCurrentDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
public final int freq() throws IOException {
|
||||
ensureFreq();
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
||||
* @lucene.internal */
|
||||
// only public so .payloads can see it.
|
||||
public float sloppyFreq() throws IOException {
|
||||
public final float sloppyFreq() throws IOException {
|
||||
ensureFreq();
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
public final long cost() {
|
||||
return spans.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return spans.asTwoPhaseIterator();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
|
|||
assertTrue("forceMerge used too much temporary space: starting usage was "
|
||||
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage
|
||||
+ " bytes; max temp usage was " + maxDiskUsage
|
||||
+ " but should have been " + (3 * maxStartFinalDiskUsage)
|
||||
+ " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage);
|
||||
+ " but should have been at most " + (4 * maxStartFinalDiskUsage)
|
||||
+ " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -162,7 +162,12 @@ final class JustCompileSearchSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean setFreqCurrentDoc() {
|
||||
protected void setFreqCurrentDoc() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected float scoreCurrentDoc() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SearchEquivalenceTestBase;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
|
@ -106,4 +107,122 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B], N, false) ⊆ SpanNearQuery([A B], N+1, false) */
|
||||
public void testSpanNearIncreasingSloppiness() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
for (int i = 0; i < 10; i++) {
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B C], N, false) ⊆ SpanNearQuery([A B C], N+1, false) */
|
||||
public void testSpanNearIncreasingSloppiness3() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
Term t3 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
|
||||
for (int i = 0; i < 10; i++) {
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B], N, true) ⊆ SpanNearQuery([A B], N+1, true) */
|
||||
public void testSpanNearIncreasingOrderedSloppiness() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
for (int i = 0; i < 10; i++) {
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B C], N, true) ⊆ SpanNearQuery([A B C], N+1, true) */
|
||||
public void testSpanNearIncreasingOrderedSloppiness3() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
Term t3 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
|
||||
for (int i = 0; i < 10; i++) {
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, i, true);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
|
||||
public void testSpanFirstTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
|
||||
Query q2 = new TermQuery(t1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanFirstQuery(A, N) ⊆ SpanFirstQuery(A, N+1) */
|
||||
public void testSpanFirstTermIncreasing() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
|
||||
Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanFirstQuery(A, ∞) = TermQuery(A) */
|
||||
public void testSpanFirstTermEverything() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE);
|
||||
Query q2 = new TermQuery(t1);
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNear() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Query q1 = new SpanFirstQuery(nearQuery, i);
|
||||
Query q2 = nearQuery;
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNearIncreasing() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Query q1 = new SpanFirstQuery(nearQuery, i);
|
||||
Query q2 = new SpanFirstQuery(nearQuery, i+1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNearEverything() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE);
|
||||
Query q2 = nearQuery;
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -306,6 +306,9 @@ public class WeightedSpanTermExtractor {
|
|||
}
|
||||
Bits acceptDocs = context.reader().getLiveDocs();
|
||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||
if (spans == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// collect span positions
|
||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package org.apache.lucene.search.highlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class MissesTest extends LuceneTestCase {
|
||||
public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
final Query query = new TermQuery(new Term("test", "foo"));
|
||||
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||
assertEquals("this is a <B>foo</B> bar example",
|
||||
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
final BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term("test", "foo")), Occur.MUST);
|
||||
query.add(new TermQuery(new Term("test", "bar")), Occur.MUST);
|
||||
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
final PhraseQuery query = new PhraseQuery();
|
||||
query.add(new Term("test", "foo"));
|
||||
query.add(new Term("test", "bar"));
|
||||
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
final Query query = new SpanNearQuery(new SpanQuery[] {
|
||||
new SpanTermQuery(new Term("test", "foo")),
|
||||
new SpanTermQuery(new Term("test", "bar"))}, 0, true);
|
||||
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -116,9 +116,9 @@ org.apache.hadoop.version = 2.6.0
|
|||
|
||||
# The httpcore version is often different from the httpclient and httpmime versions,
|
||||
# so the httpcore version value should not share the same symbolic name with them.
|
||||
/org.apache.httpcomponents/httpclient = 4.3.1
|
||||
/org.apache.httpcomponents/httpcore = 4.3
|
||||
/org.apache.httpcomponents/httpmime = 4.3.1
|
||||
/org.apache.httpcomponents/httpclient = 4.4.1
|
||||
/org.apache.httpcomponents/httpcore = 4.4.1
|
||||
/org.apache.httpcomponents/httpmime = 4.4.1
|
||||
|
||||
/org.apache.ivy/ivy = 2.3.0
|
||||
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
package org.apache.lucene.search.join;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
abstract class BaseGlobalOrdinalScorer extends Scorer {
|
||||
|
||||
final LongBitSet foundOrds;
|
||||
final SortedDocValues values;
|
||||
final Scorer approximationScorer;
|
||||
|
||||
float score;
|
||||
|
||||
public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
|
||||
super(weight);
|
||||
this.foundOrds = foundOrds;
|
||||
this.values = values;
|
||||
this.approximationScorer = approximationScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return approximationScorer.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(approximationScorer.docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
final DocIdSetIterator approximation = new DocIdSetIterator() {
|
||||
@Override
|
||||
public int docID() {
|
||||
return approximationScorer.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return approximationScorer.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return approximationScorer.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return approximationScorer.cost();
|
||||
}
|
||||
};
|
||||
return createTwoPhaseIterator(approximation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return approximationScorer.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
}
|
||||
|
||||
protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation);
|
||||
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
package org.apache.lucene.search.join;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A collector that collects all ordinals from a specified field matching the query.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class GlobalOrdinalsCollector implements Collector {
|
||||
|
||||
final String field;
|
||||
final LongBitSet collectedOrds;
|
||||
final MultiDocValues.OrdinalMap ordinalMap;
|
||||
|
||||
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||
this.field = field;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.collectedOrds = new LongBitSet(valueCount);
|
||||
}
|
||||
|
||||
public LongBitSet getCollectorOrdinals() {
|
||||
return collectedOrds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
|
||||
if (ordinalMap != null) {
|
||||
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
|
||||
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
|
||||
} else {
|
||||
return new SegmentOrdinalCollector(docTermOrds);
|
||||
}
|
||||
}
|
||||
|
||||
final class OrdinalMapCollector implements LeafCollector {
|
||||
|
||||
private final SortedDocValues docTermOrds;
|
||||
private final LongValues segmentOrdToGlobalOrdLookup;
|
||||
|
||||
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
|
||||
this.docTermOrds = docTermOrds;
|
||||
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
collectedOrds.set(globalOrd);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
final class SegmentOrdinalCollector implements LeafCollector {
|
||||
|
||||
private final SortedDocValues docTermOrds;
|
||||
|
||||
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
|
||||
this.docTermOrds = docTermOrds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
collectedOrds.set(segmentOrd);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,245 @@
|
|||
package org.apache.lucene.search.join;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
final class GlobalOrdinalsQuery extends Query {
|
||||
|
||||
// All the ords of matching docs found with OrdinalsCollector.
|
||||
private final LongBitSet foundOrds;
|
||||
private final String joinField;
|
||||
private final MultiDocValues.OrdinalMap globalOrds;
|
||||
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
|
||||
private final Query toQuery;
|
||||
|
||||
// just for hashcode and equals:
|
||||
private final Query fromQuery;
|
||||
private final IndexReader indexReader;
|
||||
|
||||
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
|
||||
this.foundOrds = foundOrds;
|
||||
this.joinField = joinField;
|
||||
this.globalOrds = globalOrds;
|
||||
this.toQuery = toQuery;
|
||||
this.fromQuery = fromQuery;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new W(this, toQuery.createWeight(searcher, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
fromQuery.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
|
||||
GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o;
|
||||
|
||||
if (!fromQuery.equals(that.fromQuery)) return false;
|
||||
if (!joinField.equals(that.joinField)) return false;
|
||||
if (!toQuery.equals(that.toQuery)) return false;
|
||||
if (!indexReader.equals(that.indexReader)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + joinField.hashCode();
|
||||
result = 31 * result + toQuery.hashCode();
|
||||
result = 31 * result + fromQuery.hashCode();
|
||||
result = 31 * result + indexReader.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "GlobalOrdinalsQuery{" +
|
||||
"joinField=" + joinField +
|
||||
'}';
|
||||
}
|
||||
|
||||
final class W extends Weight {
|
||||
|
||||
private final Weight approximationWeight;
|
||||
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
|
||||
W(Query query, Weight approximationWeight) {
|
||||
super(query);
|
||||
this.approximationWeight = approximationWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values != null) {
|
||||
int segmentOrd = values.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||
return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString());
|
||||
}
|
||||
}
|
||||
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float norm, float topLevelBoost) {
|
||||
this.queryNorm = norm * topLevelBoost;
|
||||
queryWeight *= this.queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
|
||||
if (approximationScorer == null) {
|
||||
return null;
|
||||
}
|
||||
if (globalOrds != null) {
|
||||
return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
|
||||
} {
|
||||
return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||
|
||||
final LongValues segmentOrdToGlobalOrdLookup;
|
||||
|
||||
public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
|
||||
super(weight, foundOrds, values, approximationScorer);
|
||||
this.score = score;
|
||||
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||
final long segmentOrd = values.getOrd(docID);
|
||||
if (segmentOrd != -1) {
|
||||
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
if (foundOrds.get(globalOrd)) {
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||
if (segmentOrd != -1) {
|
||||
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
if (foundOrds.get(globalOrd)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
|
||||
|
||||
public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
|
||||
super(weight, foundOrds, values, approximationScorer);
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||
final long segmentOrd = values.getOrd(docID);
|
||||
if (segmentOrd != -1) {
|
||||
if (foundOrds.get(segmentOrd)) {
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||
if (segmentOrd != -1) {
|
||||
if (foundOrds.get(segmentOrd)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
package org.apache.lucene.search.join;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
abstract class GlobalOrdinalsWithScoreCollector implements Collector {
|
||||
|
||||
final String field;
|
||||
final MultiDocValues.OrdinalMap ordinalMap;
|
||||
final LongBitSet collectedOrds;
|
||||
protected final Scores scores;
|
||||
|
||||
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||
if (valueCount > Integer.MAX_VALUE) {
|
||||
// We simply don't support more than
|
||||
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
|
||||
}
|
||||
this.field = field;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.collectedOrds = new LongBitSet(valueCount);
|
||||
this.scores = new Scores(valueCount);
|
||||
}
|
||||
|
||||
public LongBitSet getCollectorOrdinals() {
|
||||
return collectedOrds;
|
||||
}
|
||||
|
||||
public float score(int globalOrdinal) {
|
||||
return scores.getScore(globalOrdinal);
|
||||
}
|
||||
|
||||
protected abstract void doScore(int globalOrd, float existingScore, float newScore);
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
|
||||
if (ordinalMap != null) {
|
||||
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
|
||||
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
|
||||
} else {
|
||||
return new SegmentOrdinalCollector(docTermOrds);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return true;
|
||||
}
|
||||
|
||||
final class OrdinalMapCollector implements LeafCollector {
|
||||
|
||||
private final SortedDocValues docTermOrds;
|
||||
private final LongValues segmentOrdToGlobalOrdLookup;
|
||||
private Scorer scorer;
|
||||
|
||||
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
|
||||
this.docTermOrds = docTermOrds;
|
||||
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
collectedOrds.set(globalOrd);
|
||||
float existingScore = scores.getScore(globalOrd);
|
||||
float newScore = scorer.score();
|
||||
doScore(globalOrd, existingScore, newScore);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
}
|
||||
}
|
||||
|
||||
final class SegmentOrdinalCollector implements LeafCollector {
|
||||
|
||||
private final SortedDocValues docTermOrds;
|
||||
private Scorer scorer;
|
||||
|
||||
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
|
||||
this.docTermOrds = docTermOrds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final int segmentOrd = docTermOrds.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
collectedOrds.set(segmentOrd);
|
||||
float existingScore = scores.getScore(segmentOrd);
|
||||
float newScore = scorer.score();
|
||||
doScore(segmentOrd, existingScore, newScore);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Max extends GlobalOrdinalsWithScoreCollector {
|
||||
|
||||
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||
super(field, ordinalMap, valueCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||
scores.setScore(globalOrd, Math.max(existingScore, newScore));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class Sum extends GlobalOrdinalsWithScoreCollector {
|
||||
|
||||
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||
super(field, ordinalMap, valueCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||
scores.setScore(globalOrd, existingScore + newScore);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class Avg extends GlobalOrdinalsWithScoreCollector {
|
||||
|
||||
private final Occurrences occurrences;
|
||||
|
||||
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||
super(field, ordinalMap, valueCount);
|
||||
this.occurrences = new Occurrences(valueCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||
occurrences.increment(globalOrd);
|
||||
scores.setScore(globalOrd, existingScore + newScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score(int globalOrdinal) {
|
||||
return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal);
|
||||
}
|
||||
}
|
||||
|
||||
// Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
|
||||
// the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
|
||||
// Also working with smaller arrays is supposed to be more gc friendly
|
||||
//
|
||||
// At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
|
||||
// then just using an array.
|
||||
|
||||
// Maybe this should become a method parameter?
|
||||
static final int arraySize = 4096;
|
||||
|
||||
static final class Scores {
|
||||
|
||||
final float[][] blocks;
|
||||
|
||||
private Scores(long valueCount) {
|
||||
long blockSize = valueCount + arraySize - 1;
|
||||
blocks = new float[(int) ((blockSize) / arraySize)][];
|
||||
}
|
||||
|
||||
public void setScore(int globalOrdinal, float score) {
|
||||
int block = globalOrdinal / arraySize;
|
||||
int offset = globalOrdinal % arraySize;
|
||||
float[] scores = blocks[block];
|
||||
if (scores == null) {
|
||||
blocks[block] = scores = new float[arraySize];
|
||||
}
|
||||
scores[offset] = score;
|
||||
}
|
||||
|
||||
public float getScore(int globalOrdinal) {
|
||||
int block = globalOrdinal / arraySize;
|
||||
int offset = globalOrdinal % arraySize;
|
||||
float[] scores = blocks[block];
|
||||
float score;
|
||||
if (scores != null) {
|
||||
score = scores[offset];
|
||||
} else {
|
||||
score = 0f;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class Occurrences {
|
||||
|
||||
final int[][] blocks;
|
||||
|
||||
private Occurrences(long valueCount) {
|
||||
long blockSize = valueCount + arraySize - 1;
|
||||
blocks = new int[(int) (blockSize / arraySize)][];
|
||||
}
|
||||
|
||||
public void increment(int globalOrdinal) {
|
||||
int block = globalOrdinal / arraySize;
|
||||
int offset = globalOrdinal % arraySize;
|
||||
int[] occurrences = blocks[block];
|
||||
if (occurrences == null) {
|
||||
blocks[block] = occurrences = new int[arraySize];
|
||||
}
|
||||
occurrences[offset]++;
|
||||
}
|
||||
|
||||
public int getOccurence(int globalOrdinal) {
|
||||
int block = globalOrdinal / arraySize;
|
||||
int offset = globalOrdinal % arraySize;
|
||||
int[] occurrences = blocks[block];
|
||||
return occurrences[offset];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,256 @@
|
|||
package org.apache.lucene.search.join;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
final class GlobalOrdinalsWithScoreQuery extends Query {
|
||||
|
||||
private final GlobalOrdinalsWithScoreCollector collector;
|
||||
private final String joinField;
|
||||
private final MultiDocValues.OrdinalMap globalOrds;
|
||||
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
|
||||
private final Query toQuery;
|
||||
|
||||
// just for hashcode and equals:
|
||||
private final Query fromQuery;
|
||||
private final IndexReader indexReader;
|
||||
|
||||
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
|
||||
this.collector = collector;
|
||||
this.joinField = joinField;
|
||||
this.globalOrds = globalOrds;
|
||||
this.toQuery = toQuery;
|
||||
this.fromQuery = fromQuery;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new W(this, toQuery.createWeight(searcher, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
fromQuery.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
|
||||
GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
|
||||
|
||||
if (!fromQuery.equals(that.fromQuery)) return false;
|
||||
if (!joinField.equals(that.joinField)) return false;
|
||||
if (!toQuery.equals(that.toQuery)) return false;
|
||||
if (!indexReader.equals(that.indexReader)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + joinField.hashCode();
|
||||
result = 31 * result + toQuery.hashCode();
|
||||
result = 31 * result + fromQuery.hashCode();
|
||||
result = 31 * result + indexReader.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "GlobalOrdinalsQuery{" +
|
||||
"joinField=" + joinField +
|
||||
'}';
|
||||
}
|
||||
|
||||
final class W extends Weight {
|
||||
|
||||
private final Weight approximationWeight;
|
||||
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
|
||||
W(Query query, Weight approximationWeight) {
|
||||
super(query);
|
||||
this.approximationWeight = approximationWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values != null) {
|
||||
int segmentOrd = values.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
final float score;
|
||||
if (globalOrds != null) {
|
||||
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
|
||||
score = collector.scores.getScore((int) globalOrd);
|
||||
} else {
|
||||
score = collector.score(segmentOrd);
|
||||
}
|
||||
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||
return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString());
|
||||
}
|
||||
}
|
||||
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float norm, float topLevelBoost) {
|
||||
this.queryNorm = norm * topLevelBoost;
|
||||
queryWeight *= this.queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
|
||||
if (approximationScorer == null) {
|
||||
return null;
|
||||
} else if (globalOrds != null) {
|
||||
return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
|
||||
} else {
|
||||
return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||
|
||||
final LongValues segmentOrdToGlobalOrdLookup;
|
||||
final GlobalOrdinalsWithScoreCollector collector;
|
||||
|
||||
public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
|
||||
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
|
||||
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||
this.collector = collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||
final long segmentOrd = values.getOrd(docID);
|
||||
if (segmentOrd != -1) {
|
||||
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
if (foundOrds.get(globalOrd)) {
|
||||
score = collector.score((int) globalOrd);
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||
if (segmentOrd != -1) {
|
||||
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||
if (foundOrds.get(globalOrd)) {
|
||||
score = collector.score((int) globalOrd);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
|
||||
|
||||
final GlobalOrdinalsWithScoreCollector collector;
|
||||
|
||||
public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
|
||||
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
|
||||
this.collector = collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||
final int segmentOrd = values.getOrd(docID);
|
||||
if (segmentOrd != -1) {
|
||||
if (foundOrds.get(segmentOrd)) {
|
||||
score = collector.score(segmentOrd);
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final int segmentOrd = values.getOrd(approximationScorer.docID());
|
||||
if (segmentOrd != -1) {
|
||||
if (foundOrds.get(segmentOrd)) {
|
||||
score = collector.score(segmentOrd);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,7 +17,12 @@ package org.apache.lucene.search.join;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -90,4 +95,78 @@ public final class JoinUtil {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A query time join using global ordinals over a dedicated join field.
|
||||
*
|
||||
* This join has certain restrictions and requirements:
|
||||
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
|
||||
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
|
||||
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
|
||||
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
|
||||
* should store the join values as UTF-8 strings.
|
||||
* 4) An ordinal map must be provided that is created on top of the join field.
|
||||
*
|
||||
* @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
|
||||
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
|
||||
* @param toQuery The query identifying all documents on the "to" side.
|
||||
* @param searcher The index searcher used to execute the from query
|
||||
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
|
||||
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
|
||||
* needs to be provided.
|
||||
* @return a {@link Query} instance that can be used to join documents based on the join field
|
||||
* @throws IOException If I/O related errors occur
|
||||
*/
|
||||
public static Query createJoinQuery(String joinField,
|
||||
Query fromQuery,
|
||||
Query toQuery,
|
||||
IndexSearcher searcher,
|
||||
ScoreMode scoreMode,
|
||||
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
|
||||
IndexReader indexReader = searcher.getIndexReader();
|
||||
int numSegments = indexReader.leaves().size();
|
||||
final long valueCount;
|
||||
if (numSegments == 0) {
|
||||
return new MatchNoDocsQuery();
|
||||
} else if (numSegments == 1) {
|
||||
// No need to use the ordinal map, because there is just one segment.
|
||||
ordinalMap = null;
|
||||
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
|
||||
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
|
||||
if (joinSortedDocValues != null) {
|
||||
valueCount = joinSortedDocValues.getValueCount();
|
||||
} else {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
} else {
|
||||
if (ordinalMap == null) {
|
||||
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
|
||||
}
|
||||
valueCount = ordinalMap.getValueCount();
|
||||
}
|
||||
|
||||
Query rewrittenFromQuery = searcher.rewrite(fromQuery);
|
||||
if (scoreMode == ScoreMode.None) {
|
||||
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
|
||||
searcher.search(fromQuery, globalOrdinalsCollector);
|
||||
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
|
||||
}
|
||||
|
||||
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
|
||||
switch (scoreMode) {
|
||||
case Total:
|
||||
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount);
|
||||
break;
|
||||
case Max:
|
||||
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount);
|
||||
break;
|
||||
case Avg:
|
||||
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
|
||||
}
|
||||
searcher.search(fromQuery, globalOrdinalsWithScoreCollector);
|
||||
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,19 +17,6 @@ package org.apache.lucene.search.join;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterLeafCollector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class TestJoinUtil extends LuceneTestCase {
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
|
@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testSimpleOrdinalsJoin() throws Exception {
|
||||
final String idField = "id";
|
||||
final String productIdField = "productId";
|
||||
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
|
||||
final String typeField = "type";
|
||||
// A single sorted doc values field that holds the join values for all document types.
|
||||
// Typically during indexing a schema will automatically create this field with the values
|
||||
final String joinField = idField + productIdField;
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(
|
||||
random(),
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
|
||||
// 0
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField(idField, "1", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||
doc.add(new TextField("description", "random text", Field.Store.NO));
|
||||
doc.add(new TextField("name", "name1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 1
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "1", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "10.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 2
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "1", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "20.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 3
|
||||
doc = new Document();
|
||||
doc.add(new TextField(idField, "2", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||
doc.add(new TextField("description", "more random text", Field.Store.NO));
|
||||
doc.add(new TextField("name", "name2", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
// 4
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "2", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "10.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 5
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "2", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "20.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
|
||||
w.close();
|
||||
|
||||
IndexReader r = indexSearcher.getIndexReader();
|
||||
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
LeafReader leafReader = r.leaves().get(i).reader();
|
||||
values[i] = DocValues.getSorted(leafReader, joinField);
|
||||
}
|
||||
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
|
||||
r.getCoreCacheKey(), values, PackedInts.DEFAULT
|
||||
);
|
||||
|
||||
Query toQuery = new TermQuery(new Term(typeField, "price"));
|
||||
Query fromQuery = new TermQuery(new Term("name", "name2"));
|
||||
// Search for product and return prices
|
||||
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||
TopDocs result = indexSearcher.search(joinQuery, 10);
|
||||
assertEquals(2, result.totalHits);
|
||||
assertEquals(4, result.scoreDocs[0].doc);
|
||||
assertEquals(5, result.scoreDocs[1].doc);
|
||||
|
||||
fromQuery = new TermQuery(new Term("name", "name1"));
|
||||
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||
result = indexSearcher.search(joinQuery, 10);
|
||||
assertEquals(2, result.totalHits);
|
||||
assertEquals(1, result.scoreDocs[0].doc);
|
||||
assertEquals(2, result.scoreDocs[1].doc);
|
||||
|
||||
// Search for prices and return products
|
||||
fromQuery = new TermQuery(new Term("price", "20.0"));
|
||||
toQuery = new TermQuery(new Term(typeField, "product"));
|
||||
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||
result = indexSearcher.search(joinQuery, 10);
|
||||
assertEquals(2, result.totalHits);
|
||||
assertEquals(0, result.scoreDocs[0].doc);
|
||||
assertEquals(3, result.scoreDocs[1].doc);
|
||||
|
||||
indexSearcher.getIndexReader().close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomOrdinalsJoin() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(
|
||||
random(),
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
|
||||
);
|
||||
IndexIterationContext context = createContext(100, w, false, true);
|
||||
|
||||
w.forceMerge(1);
|
||||
|
||||
w.close();
|
||||
IndexReader topLevelReader = DirectoryReader.open(dir);
|
||||
|
||||
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
|
||||
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
|
||||
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
|
||||
}
|
||||
context.ordinalMap = MultiDocValues.OrdinalMap.build(
|
||||
topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
|
||||
);
|
||||
IndexSearcher indexSearcher = newSearcher(topLevelReader);
|
||||
|
||||
int r = random().nextInt(context.randomUniqueValues.length);
|
||||
boolean from = context.randomFrom[r];
|
||||
String randomValue = context.randomUniqueValues[r];
|
||||
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
|
||||
|
||||
final Query actualQuery = new TermQuery(new Term("value", randomValue));
|
||||
if (VERBOSE) {
|
||||
System.out.println("actualQuery=" + actualQuery);
|
||||
}
|
||||
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
|
||||
if (VERBOSE) {
|
||||
System.out.println("scoreMode=" + scoreMode);
|
||||
}
|
||||
|
||||
final Query joinQuery;
|
||||
if (from) {
|
||||
BooleanQuery fromQuery = new BooleanQuery();
|
||||
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
|
||||
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
|
||||
Query toQuery = new TermQuery(new Term("type", "to"));
|
||||
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
|
||||
} else {
|
||||
BooleanQuery fromQuery = new BooleanQuery();
|
||||
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
|
||||
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
|
||||
Query toQuery = new TermQuery(new Term("type", "from"));
|
||||
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("joinQuery=" + joinQuery);
|
||||
}
|
||||
|
||||
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
|
||||
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
|
||||
assertBitSet(expectedResult, actualResult, indexSearcher);
|
||||
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
|
||||
topLevelReader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
|
||||
public void testOverflowTermsWithScoreCollector() throws Exception {
|
||||
test300spartans(true, ScoreMode.Avg);
|
||||
|
@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
|
||||
);
|
||||
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument);
|
||||
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false);
|
||||
|
||||
IndexReader topLevelReader = w.getReader();
|
||||
w.close();
|
||||
|
@ -485,28 +662,20 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
|
||||
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
|
||||
indexSearcher.search(joinQuery, new Collector() {
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final int docBase = context.docBase;
|
||||
final LeafCollector in = topScoreDocCollector.getLeafCollector(context);
|
||||
return new FilterLeafCollector(in) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
super.collect(doc);
|
||||
actualResult.set(doc + docBase);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return topScoreDocCollector.needsScores();
|
||||
}
|
||||
});
|
||||
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
|
||||
// Asserting bit set...
|
||||
assertBitSet(expectedResult, actualResult, indexSearcher);
|
||||
// Asserting TopDocs...
|
||||
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
|
||||
}
|
||||
topLevelReader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
|
||||
if (VERBOSE) {
|
||||
System.out.println("expected cardinality:" + expectedResult.cardinality());
|
||||
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
|
||||
|
@ -520,14 +689,13 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
assertEquals(expectedResult, actualResult);
|
||||
}
|
||||
|
||||
// Asserting TopDocs...
|
||||
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||
private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
|
||||
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
|
||||
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
|
||||
if (scoreMode == ScoreMode.None) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
|
||||
|
@ -542,16 +710,16 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
|
||||
}
|
||||
}
|
||||
topLevelReader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException {
|
||||
return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin);
|
||||
}
|
||||
|
||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException {
|
||||
return createContext(nDocs, writer, writer, multipleValuesPerDocument);
|
||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
|
||||
if (globalOrdinalJoin) {
|
||||
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
|
||||
}
|
||||
|
||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException {
|
||||
IndexIterationContext context = new IndexIterationContext();
|
||||
int numRandomValues = nDocs / 2;
|
||||
context.randomUniqueValues = new String[numRandomValues];
|
||||
|
@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
for (int i = 0; i < numRandomValues; i++) {
|
||||
String uniqueRandomValue;
|
||||
do {
|
||||
uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
|
||||
// uniqueRandomValue = _TestUtil.randomSimpleString(random);
|
||||
// uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
|
||||
uniqueRandomValue = TestUtil.randomSimpleString(random());
|
||||
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
||||
// Generate unique values and empty strings aren't allowed.
|
||||
trackSet.add(uniqueRandomValue);
|
||||
|
@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
boolean from = context.randomFrom[randomI];
|
||||
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
|
||||
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
|
||||
if (globalOrdinalJoin) {
|
||||
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
|
||||
}
|
||||
for (int j = 0; j < numberOfLinkValues; j++) {
|
||||
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
|
||||
docs[i].linkValues.add(linkValue);
|
||||
if (from) {
|
||||
if (!context.fromDocuments.containsKey(linkValue)) {
|
||||
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
||||
context.fromDocuments.put(linkValue, new ArrayList<>());
|
||||
}
|
||||
if (!context.randomValueFromDocs.containsKey(value)) {
|
||||
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
|
||||
context.randomValueFromDocs.put(value, new ArrayList<>());
|
||||
}
|
||||
|
||||
context.fromDocuments.get(linkValue).add(docs[i]);
|
||||
|
@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
} else {
|
||||
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
|
||||
}
|
||||
if (globalOrdinalJoin) {
|
||||
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
|
||||
}
|
||||
} else {
|
||||
if (!context.toDocuments.containsKey(linkValue)) {
|
||||
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
||||
context.toDocuments.put(linkValue, new ArrayList<>());
|
||||
}
|
||||
if (!context.randomValueToDocs.containsKey(value)) {
|
||||
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
|
||||
context.randomValueToDocs.put(value, new ArrayList<>());
|
||||
}
|
||||
|
||||
context.toDocuments.get(linkValue).add(docs[i]);
|
||||
|
@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
} else {
|
||||
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
|
||||
}
|
||||
if (globalOrdinalJoin) {
|
||||
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
if (joinScore == null) {
|
||||
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
|
||||
}
|
||||
joinScore.addScore(scorer.score());
|
||||
}
|
||||
|
||||
|
@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
|
||||
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
|
||||
|
||||
MultiDocValues.OrdinalMap ordinalMap;
|
||||
}
|
||||
|
||||
private static class RandomDoc {
|
||||
|
@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
private static class BitSetCollector extends SimpleCollector {
|
||||
|
||||
private final BitSet bitSet;
|
||||
private int docBase;
|
||||
|
||||
private BitSetCollector(BitSet bitSet) {
|
||||
this.bitSet = bitSet;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
bitSet.set(docBase + doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
0ec13f6423eb6d5858e229939a2bc118473ef94c
|
|
@ -0,0 +1 @@
|
|||
016d0bc512222f1253ee6b64d389c84e22f697f0
|
|
@ -1 +0,0 @@
|
|||
11393498b38e9695d0850cac26fde5613ae268b9
|
|
@ -0,0 +1 @@
|
|||
f5aa318bda4c6c8d688c9d00b90681dcd82ce636
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
|
@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
@ -158,10 +155,11 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
weights[i] = Math.abs(random().nextLong());
|
||||
document.add(newSuggestField("suggest_field", "abc", weights[i]));
|
||||
iw.addDocument(document);
|
||||
}
|
||||
if (rarely()) {
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
Entry[] expectedEntries = new Entry[num];
|
||||
|
@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
}
|
||||
iw.addDocument(document);
|
||||
document.clear();
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
iw.deleteDocuments(new Term("str_field", "delete"));
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(iw, false);
|
||||
DirectoryReader reader = DirectoryReader.open(iw, true);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
|
||||
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
|
||||
|
@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
|
||||
iw.addDocument(document);
|
||||
document.clear();
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
|
||||
|
@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
document.add(newStringField("delete", "delete", Field.Store.NO));
|
||||
iw.addDocument(document);
|
||||
document.clear();
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
iw.deleteDocuments(new Term("delete", "delete"));
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(iw, false);
|
||||
DirectoryReader reader = DirectoryReader.open(iw, true);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
|
||||
assertThat(suggest.totalHits, equalTo(0));
|
||||
|
@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
document.add(new IntField("weight_fld", i, Field.Store.YES));
|
||||
iw.addDocument(document);
|
||||
document.clear();
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
|
||||
|
@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
document.add(new IntField("filter_int_fld", i, Field.Store.NO));
|
||||
iw.addDocument(document);
|
||||
document.clear();
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
|
@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
|||
document.add(newSuggestField("suggest_field", suggest, weight));
|
||||
mappings.put(suggest, weight);
|
||||
iw.addDocument(document);
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
|
|
|
@ -263,6 +263,9 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase {
|
|||
* Both queries will be filtered by <code>filter</code>
|
||||
*/
|
||||
protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
|
||||
QueryUtils.check(q1);
|
||||
QueryUtils.check(q2);
|
||||
|
||||
if (filter != null) {
|
||||
q1 = new FilteredQuery(q1, filter);
|
||||
q2 = new FilteredQuery(q2, filter);
|
||||
|
|
|
@ -78,6 +78,9 @@ Detailed Change List
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-6637: Solr should have a way to restore a core from a backed up index.
|
||||
(Varun Thacker, noble, shalin)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -90,6 +93,11 @@ Optimizations
|
|||
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
|
||||
(Stephan Lagraulet via Varun Thacker)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-6865: Upgrade HttpClient to 4.4.1 (Shawn Heisey)
|
||||
|
||||
================== 5.1.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.nio.channels.FileChannel;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -246,31 +247,31 @@ public class IndexFetcher {
|
|||
}
|
||||
}
|
||||
|
||||
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException {
|
||||
return fetchLatestIndex(core, forceReplication, false);
|
||||
boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException {
|
||||
return fetchLatestIndex(forceReplication, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* This command downloads all the necessary files from master to install a index commit point. Only changed files are
|
||||
* downloaded. It also downloads the conf files (if they are modified).
|
||||
*
|
||||
* @param core the SolrCore
|
||||
* @param forceReplication force a replication in all cases
|
||||
* @param forceCoreReload force a core reload in all cases
|
||||
* @return true on success, false if slave is already in sync
|
||||
* @throws IOException if an exception occurs
|
||||
*/
|
||||
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
|
||||
boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
|
||||
|
||||
boolean cleanupDone = false;
|
||||
boolean successfulInstall = false;
|
||||
replicationStartTime = System.currentTimeMillis();
|
||||
Directory tmpIndexDir = null;
|
||||
String tmpIndex = null;
|
||||
String tmpIndex;
|
||||
Directory indexDir = null;
|
||||
String indexDirPath = null;
|
||||
String indexDirPath;
|
||||
boolean deleteTmpIdxDir = true;
|
||||
|
||||
if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) {
|
||||
if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) {
|
||||
// if the last replication was not a success, we force a full replication
|
||||
// when we are a bit more confident we may want to try a partial replication
|
||||
// if the error is connection related or something, but we have to be careful
|
||||
|
@ -279,7 +280,7 @@ public class IndexFetcher {
|
|||
|
||||
try {
|
||||
//get the current 'replicateable' index version in the master
|
||||
NamedList response = null;
|
||||
NamedList response;
|
||||
try {
|
||||
response = getLatestVersion();
|
||||
} catch (Exception e) {
|
||||
|
@ -290,12 +291,12 @@ public class IndexFetcher {
|
|||
long latestGeneration = (Long) response.get(GENERATION);
|
||||
|
||||
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
||||
IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
|
||||
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
|
||||
if (commit == null) {
|
||||
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
|
||||
RefCounted<SolrIndexSearcher> searcherRefCounted = null;
|
||||
try {
|
||||
searcherRefCounted = core.getNewestSearcher(false);
|
||||
searcherRefCounted = solrCore.getNewestSearcher(false);
|
||||
if (searcherRefCounted == null) {
|
||||
LOG.warn("No open searcher found - fetch aborted");
|
||||
return false;
|
||||
|
@ -312,15 +313,14 @@ public class IndexFetcher {
|
|||
if (forceReplication && commit.getGeneration() != 0) {
|
||||
// since we won't get the files for an empty index,
|
||||
// we just clear ours and commit
|
||||
RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
|
||||
RefCounted<IndexWriter> iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore);
|
||||
try {
|
||||
iw.get().deleteAll();
|
||||
} finally {
|
||||
iw.decref();
|
||||
}
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core,
|
||||
new ModifiableSolrParams());
|
||||
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams());
|
||||
solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
|
||||
}
|
||||
|
||||
//there is nothing to be replicated
|
||||
|
@ -340,7 +340,9 @@ public class IndexFetcher {
|
|||
// get the list of files first
|
||||
fetchFileList(latestGeneration);
|
||||
// this can happen if the commit point is deleted before we fetch the file list.
|
||||
if(filesToDownload.isEmpty()) return false;
|
||||
if (filesToDownload.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
|
||||
|
||||
// Create the sync service
|
||||
|
@ -354,13 +356,13 @@ public class IndexFetcher {
|
|||
|| commit.getGeneration() >= latestGeneration || forceReplication;
|
||||
|
||||
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
|
||||
tmpIndex = createTempindexDir(core, tmpIdxDirName);
|
||||
tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString();
|
||||
|
||||
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
// cindex dir...
|
||||
indexDirPath = core.getIndexDir();
|
||||
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
indexDirPath = solrCore.getIndexDir();
|
||||
indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
try {
|
||||
|
||||
|
@ -404,7 +406,7 @@ public class IndexFetcher {
|
|||
} finally {
|
||||
writer.decref();
|
||||
}
|
||||
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
|
||||
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true);
|
||||
}
|
||||
boolean reloadCore = false;
|
||||
|
||||
|
@ -422,7 +424,7 @@ public class IndexFetcher {
|
|||
reloadCore = true;
|
||||
downloadConfFiles(confFilesToDownload, latestGeneration);
|
||||
if (isFullCopyNeeded) {
|
||||
successfulInstall = modifyIndexProps(tmpIdxDirName);
|
||||
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
|
||||
deleteTmpIdxDir = false;
|
||||
} else {
|
||||
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
||||
|
@ -433,8 +435,8 @@ public class IndexFetcher {
|
|||
// may be closed
|
||||
if (indexDir != null) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
solrCore.getDirectoryFactory().remove(indexDir);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -446,7 +448,7 @@ public class IndexFetcher {
|
|||
} else {
|
||||
terminateAndWaitFsyncService();
|
||||
if (isFullCopyNeeded) {
|
||||
successfulInstall = modifyIndexProps(tmpIdxDirName);
|
||||
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
|
||||
deleteTmpIdxDir = false;
|
||||
} else {
|
||||
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
||||
|
@ -458,13 +460,13 @@ public class IndexFetcher {
|
|||
}
|
||||
} finally {
|
||||
if (!isFullCopyNeeded) {
|
||||
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core);
|
||||
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore);
|
||||
}
|
||||
}
|
||||
|
||||
// we must reload the core after we open the IW back up
|
||||
if (successfulInstall && (reloadCore || forceCoreReload)) {
|
||||
LOG.info("Reloading SolrCore {}", core.getName());
|
||||
LOG.info("Reloading SolrCore {}", solrCore.getName());
|
||||
reloadCore();
|
||||
}
|
||||
|
||||
|
@ -474,8 +476,8 @@ public class IndexFetcher {
|
|||
// may be closed
|
||||
if (indexDir != null) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
solrCore.getDirectoryFactory().remove(indexDir);
|
||||
}
|
||||
}
|
||||
if (isFullCopyNeeded) {
|
||||
|
@ -486,13 +488,13 @@ public class IndexFetcher {
|
|||
}
|
||||
|
||||
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
|
||||
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||
cleanupDone = true;
|
||||
// we try with a full copy of the index
|
||||
LOG.warn(
|
||||
"Replication attempt was not successful - trying a full index replication reloadCore={}",
|
||||
reloadCore);
|
||||
successfulInstall = fetchLatestIndex(core, true, reloadCore);
|
||||
successfulInstall = fetchLatestIndex(true, reloadCore);
|
||||
}
|
||||
|
||||
replicationStartTime = 0;
|
||||
|
@ -505,11 +507,11 @@ public class IndexFetcher {
|
|||
} catch (InterruptedException e) {
|
||||
throw new InterruptedException("Index fetch interrupted");
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
|
||||
}
|
||||
} finally {
|
||||
if (!cleanupDone) {
|
||||
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -719,15 +721,6 @@ public class IndexFetcher {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* All the files are copied to a temp dir first
|
||||
*/
|
||||
private String createTempindexDir(SolrCore core, String tmpIdxDirName) {
|
||||
// TODO: there should probably be a DirectoryFactory#concatPath(parent, name)
|
||||
// or something
|
||||
return core.getDataDir() + tmpIdxDirName;
|
||||
}
|
||||
|
||||
private void reloadCore() {
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
new Thread() {
|
||||
|
@ -815,12 +808,12 @@ public class IndexFetcher {
|
|||
|| filename.startsWith("segments_") || size < _100K);
|
||||
}
|
||||
|
||||
static class CompareResult {
|
||||
protected static class CompareResult {
|
||||
boolean equal = false;
|
||||
boolean checkSummed = false;
|
||||
}
|
||||
|
||||
private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
|
||||
protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
|
||||
CompareResult compareResult = new CompareResult();
|
||||
try {
|
||||
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
|
||||
|
@ -887,8 +880,8 @@ public class IndexFetcher {
|
|||
}
|
||||
|
||||
/**
|
||||
* All the files which are common between master and slave must have same size else we assume they are
|
||||
* not compatible (stale).
|
||||
* All the files which are common between master and slave must have same size and same checksum else we assume
|
||||
* they are not compatible (stale).
|
||||
*
|
||||
* @return true if the index stale and we need to download a fresh copy, false otherwise.
|
||||
* @throws IOException if low level io error
|
||||
|
@ -1034,7 +1027,7 @@ public class IndexFetcher {
|
|||
/**
|
||||
* If the index is stale by any chance, load index from a different dir in the data dir.
|
||||
*/
|
||||
private boolean modifyIndexProps(String tmpIdxDirName) {
|
||||
protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) {
|
||||
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
|
||||
Properties p = new Properties();
|
||||
Directory dir = null;
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
package org.apache.solr.handler;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
class OldBackupDirectory implements Comparable<OldBackupDirectory> {
|
||||
File dir;
|
||||
Date timestamp;
|
||||
private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
|
||||
|
||||
OldBackupDirectory(File dir) {
|
||||
if(dir.isDirectory()) {
|
||||
Matcher m = dirNamePattern.matcher(dir.getName());
|
||||
if(m.find()) {
|
||||
try {
|
||||
this.dir = dir;
|
||||
this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1));
|
||||
} catch(Exception e) {
|
||||
this.dir = null;
|
||||
this.timestamp = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public int compareTo(OldBackupDirectory that) {
|
||||
return that.timestamp.compareTo(this.timestamp);
|
||||
}
|
||||
}
|
|
@ -36,7 +36,9 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
|
||||
private ReentrantLock indexFetchLock = new ReentrantLock();
|
||||
|
||||
private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor(
|
||||
new DefaultSolrThreadFactory("restoreExecutor"));
|
||||
|
||||
private volatile Future<Boolean> restoreFuture;
|
||||
|
||||
private volatile String currentRestoreName;
|
||||
|
||||
private String includeConfFiles;
|
||||
|
||||
private NamedList<String> confFileNameAlias = new NamedList<>();
|
||||
|
@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
} else if (command.equalsIgnoreCase(CMD_BACKUP)) {
|
||||
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
|
||||
rsp.add(STATUS, OK_STATUS);
|
||||
} else if (command.equalsIgnoreCase(CMD_RESTORE)) {
|
||||
restore(new ModifiableSolrParams(solrParams), rsp, req);
|
||||
rsp.add(STATUS, OK_STATUS);
|
||||
} else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) {
|
||||
rsp.add(CMD_RESTORE_STATUS, getRestoreStatus());
|
||||
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
|
||||
deleteSnapshot(new ModifiableSolrParams(solrParams));
|
||||
rsp.add(STATUS, OK_STATUS);
|
||||
|
@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
|
||||
}
|
||||
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
|
||||
snapShooter.validateDeleteSnapshot();
|
||||
snapShooter.deleteSnapAsync(this);
|
||||
}
|
||||
|
@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
} else {
|
||||
currentIndexFetcher = pollingIndexFetcher;
|
||||
}
|
||||
return currentIndexFetcher.fetchLatestIndex(core, forceReplication);
|
||||
return currentIndexFetcher.fetchLatestIndex(forceReplication);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(LOG, "Index fetch failed ", e);
|
||||
} finally {
|
||||
|
@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
return indexFetchLock.isLocked();
|
||||
}
|
||||
|
||||
private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) {
|
||||
if (restoreFuture != null && !restoreFuture.isDone()) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" +
|
||||
"for the same core");
|
||||
}
|
||||
String name = params.get(NAME);
|
||||
String location = params.get(LOCATION);
|
||||
|
||||
//If location is not provided then assume that the restore index is present inside the data directory.
|
||||
if (location == null) {
|
||||
location = core.getDataDir();
|
||||
}
|
||||
|
||||
//If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format)
|
||||
//snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp.
|
||||
if (name == null) {
|
||||
File[] files = new File(location).listFiles();
|
||||
List<OldBackupDirectory> dirs = new ArrayList<>();
|
||||
for (File f : files) {
|
||||
OldBackupDirectory obd = new OldBackupDirectory(f);
|
||||
if (obd.dir != null) {
|
||||
dirs.add(obd);
|
||||
}
|
||||
}
|
||||
Collections.sort(dirs);
|
||||
if (dirs.size() == 0) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir());
|
||||
}
|
||||
name = dirs.get(0).dir.getName();
|
||||
} else {
|
||||
//"snapshot." is prefixed by snapshooter
|
||||
name = "snapshot." + name;
|
||||
}
|
||||
|
||||
RestoreCore restoreCore = new RestoreCore(core, location, name);
|
||||
restoreFuture = restoreExecutor.submit(restoreCore);
|
||||
currentRestoreName = name;
|
||||
}
|
||||
|
||||
private NamedList<Object> getRestoreStatus() {
|
||||
NamedList<Object> status = new SimpleOrderedMap<>();
|
||||
|
||||
if (restoreFuture == null) {
|
||||
status.add(STATUS, "No restore actions in progress");
|
||||
return status;
|
||||
}
|
||||
|
||||
status.add("snapshotName", currentRestoreName);
|
||||
if (restoreFuture.isDone()) {
|
||||
try {
|
||||
boolean success = restoreFuture.get();
|
||||
if (success) {
|
||||
status.add(STATUS, SUCCESS);
|
||||
} else {
|
||||
status.add(STATUS, FAILED);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
status.add(STATUS, FAILED);
|
||||
status.add(EXCEPTION, e.getMessage());
|
||||
}
|
||||
} else {
|
||||
status.add(STATUS, "In Progress");
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
private void doSnapShoot(SolrParams params, SolrQueryResponse rsp,
|
||||
SolrQueryRequest req) {
|
||||
try {
|
||||
|
@ -487,7 +567,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
result.add(fileMeta);
|
||||
} catch (IOException e) {
|
||||
rsp.add("status", "unable to get file names for given index generation");
|
||||
rsp.add("exception", e);
|
||||
rsp.add(EXCEPTION, e);
|
||||
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
|
||||
} finally {
|
||||
if (dir != null) {
|
||||
|
@ -1106,6 +1186,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
@Override
|
||||
public void postClose(SolrCore core) {}
|
||||
});
|
||||
|
||||
core.addCloseHook(new CloseHook() {
|
||||
@Override
|
||||
public void preClose(SolrCore core) {
|
||||
ExecutorUtil.shutdownNowAndAwaitTermination(restoreExecutor);
|
||||
if (restoreFuture != null) {
|
||||
restoreFuture.cancel(true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postClose(SolrCore core) {}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1407,6 +1500,14 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
return result;
|
||||
}
|
||||
|
||||
private static final String LOCATION = "location";
|
||||
|
||||
private static final String SUCCESS = "success";
|
||||
|
||||
private static final String FAILED = "failed";
|
||||
|
||||
private static final String EXCEPTION = "exception";
|
||||
|
||||
public static final String MASTER_URL = "masterUrl";
|
||||
|
||||
public static final String STATUS = "status";
|
||||
|
@ -1417,6 +1518,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
|
||||
public static final String CMD_BACKUP = "backup";
|
||||
|
||||
public static final String CMD_RESTORE = "restore";
|
||||
|
||||
public static final String CMD_RESTORE_STATUS = "restorestatus";
|
||||
|
||||
public static final String CMD_FETCH_INDEX = "fetchindex";
|
||||
|
||||
public static final String CMD_ABORT_FETCH = "abortfetch";
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
package org.apache.solr.handler;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class RestoreCore implements Callable<Boolean> {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(RestoreCore.class.getName());
|
||||
|
||||
private final String backupName;
|
||||
private final String backupLocation;
|
||||
private final SolrCore core;
|
||||
|
||||
public RestoreCore(SolrCore core, String location, String name) {
|
||||
this.core = core;
|
||||
this.backupLocation = location;
|
||||
this.backupName = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean call() throws Exception {
|
||||
return doRestore();
|
||||
}
|
||||
|
||||
private boolean doRestore() throws Exception {
|
||||
|
||||
Path backupPath = Paths.get(backupLocation, backupName);
|
||||
String restoreIndexName = "restore." + backupName;
|
||||
Path restoreIndexPath = Paths.get(core.getDataDir(), restoreIndexName);
|
||||
|
||||
Directory restoreIndexDir = null;
|
||||
Directory indexDir = null;
|
||||
try (Directory backupDir = FSDirectory.open(backupPath)) {
|
||||
|
||||
restoreIndexDir = core.getDirectoryFactory().get(restoreIndexPath.toString(),
|
||||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Prefer local copy.
|
||||
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
|
||||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Move all files from backupDir to restoreIndexDir
|
||||
for (String filename : backupDir.listAll()) {
|
||||
checkInterrupted();
|
||||
log.info("Copying over file to restore directory " + filename);
|
||||
try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) {
|
||||
long checksum = CodecUtil.retrieveChecksum(indexInput);
|
||||
long length = indexInput.length();
|
||||
IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, filename, length, checksum);
|
||||
if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si")
|
||||
|| filename.endsWith(".liv") || filename.startsWith("segments_")))) {
|
||||
restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE);
|
||||
} else {
|
||||
//prefer local copy
|
||||
restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
|
||||
}
|
||||
}
|
||||
log.debug("Switching directories");
|
||||
IndexFetcher.modifyIndexProps(core, restoreIndexName);
|
||||
|
||||
boolean success;
|
||||
try {
|
||||
core.getUpdateHandler().newIndexWriter(false);
|
||||
openNewSearcher();
|
||||
success = true;
|
||||
log.info("Successfully restored to the backup index");
|
||||
} catch (Exception e) {
|
||||
//Rollback to the old index directory. Delete the restore index directory and mark the restore as failed.
|
||||
log.info("Could not switch to restored index. Rolling back to the current index");
|
||||
Directory dir = null;
|
||||
try {
|
||||
dir = core.getDirectoryFactory().get(core.getDataDir(), DirectoryFactory.DirContext.META_DATA,
|
||||
core.getSolrConfig().indexConfig.lockType);
|
||||
dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
|
||||
} finally {
|
||||
if (dir != null) {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
}
|
||||
}
|
||||
|
||||
core.getDirectoryFactory().doneWithDirectory(restoreIndexDir);
|
||||
core.getDirectoryFactory().remove(restoreIndexDir);
|
||||
core.getUpdateHandler().newIndexWriter(false);
|
||||
openNewSearcher();
|
||||
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
|
||||
}
|
||||
if (success) {
|
||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
}
|
||||
|
||||
return true;
|
||||
} finally {
|
||||
if (restoreIndexDir != null) {
|
||||
core.getDirectoryFactory().release(restoreIndexDir);
|
||||
}
|
||||
if (indexDir != null) {
|
||||
core.getDirectoryFactory().release(indexDir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkInterrupted() throws InterruptedException {
|
||||
if (Thread.currentThread().isInterrupted()) {
|
||||
throw new InterruptedException("Stopping restore process. Thread was interrupted.");
|
||||
}
|
||||
}
|
||||
|
||||
private void openNewSearcher() throws Exception {
|
||||
Future[] waitSearcher = new Future[1];
|
||||
core.getSearcher(true, false, waitSearcher, true);
|
||||
if (waitSearcher[0] != null) {
|
||||
waitSearcher[0].get();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,6 +18,7 @@ package org.apache.solr.handler;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -58,10 +59,11 @@ public class SnapShooter {
|
|||
|
||||
public SnapShooter(SolrCore core, String location, String snapshotName) {
|
||||
solrCore = core;
|
||||
if (location == null) snapDir = core.getDataDir();
|
||||
if (location == null) {
|
||||
snapDir = core.getDataDir();
|
||||
}
|
||||
else {
|
||||
File base = new File(core.getCoreDescriptor().getInstanceDir());
|
||||
snapDir = org.apache.solr.util.FileUtils.resolvePath(base, location).getAbsolutePath();
|
||||
snapDir = Paths.get(core.getCoreDescriptor().getInstanceDir()).resolve(location).toAbsolutePath().toString();
|
||||
}
|
||||
this.snapshotName = snapshotName;
|
||||
|
||||
|
@ -125,7 +127,7 @@ public class SnapShooter {
|
|||
}
|
||||
|
||||
void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) {
|
||||
LOG.info("Creating backup snapshot...");
|
||||
LOG.info("Creating backup snapshot " + (snapshotName == null ? "<not named>" : snapshotName));
|
||||
NamedList<Object> details = new NamedList<>();
|
||||
details.add("startTime", new Date().toString());
|
||||
try {
|
||||
|
@ -193,31 +195,6 @@ public class SnapShooter {
|
|||
replicationHandler.snapShootDetails = details;
|
||||
}
|
||||
|
||||
private class OldBackupDirectory implements Comparable<OldBackupDirectory>{
|
||||
File dir;
|
||||
Date timestamp;
|
||||
final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
|
||||
|
||||
OldBackupDirectory(File dir) {
|
||||
if(dir.isDirectory()) {
|
||||
Matcher m = dirNamePattern.matcher(dir.getName());
|
||||
if(m.find()) {
|
||||
try {
|
||||
this.dir = dir;
|
||||
this.timestamp = new SimpleDateFormat(DATE_FMT, Locale.ROOT).parse(m.group(1));
|
||||
} catch(Exception e) {
|
||||
this.dir = null;
|
||||
this.timestamp = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public int compareTo(OldBackupDirectory that) {
|
||||
return that.timestamp.compareTo(this.timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
public static final String DATE_FMT = "yyyyMMddHHmmssSSS";
|
||||
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
@Test
|
||||
public void testBackupOnCommit() throws Exception {
|
||||
//Index
|
||||
int nDocs = indexDocs();
|
||||
int nDocs = indexDocs(masterClient);
|
||||
|
||||
//Confirm if completed
|
||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient);
|
||||
|
@ -146,7 +146,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
private int indexDocs() throws IOException, SolrServerException {
|
||||
protected static int indexDocs(SolrClient masterClient) throws IOException, SolrServerException {
|
||||
int nDocs = TestUtil.nextInt(random(), 1, 100);
|
||||
masterClient.deleteByQuery("*:*");
|
||||
for (int i = 0; i < nDocs; i++) {
|
||||
|
@ -164,7 +164,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
@Test
|
||||
public void doTestBackup() throws Exception {
|
||||
|
||||
int nDocs = indexDocs();
|
||||
int nDocs = indexDocs(masterClient);
|
||||
|
||||
Path[] snapDir = new Path[5]; //One extra for the backup on commit
|
||||
//First snapshot location
|
||||
|
@ -180,17 +180,16 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
backupNames = new String[4];
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
BackupCommand backupCommand;
|
||||
final String backupName = TestUtil.randomSimpleString(random(), 1, 20);
|
||||
if (!namedBackup) {
|
||||
backupCommand = new BackupCommand(addNumberToKeepInRequest, backupKeepParamName, ReplicationHandler.CMD_BACKUP);
|
||||
if (addNumberToKeepInRequest) {
|
||||
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&" + backupKeepParamName + "=2");
|
||||
} else {
|
||||
backupCommand = new BackupCommand(backupName, ReplicationHandler.CMD_BACKUP);
|
||||
backupNames[i] = backupName;
|
||||
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "");
|
||||
}
|
||||
backupCommand.runCommand();
|
||||
if (backupCommand.fail != null) {
|
||||
fail(backupCommand.fail);
|
||||
} else {
|
||||
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&name=" + backupName);
|
||||
backupNames[i] = backupName;
|
||||
}
|
||||
|
||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp);
|
||||
|
@ -253,8 +252,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException {
|
||||
String lastTimestamp = null;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
BackupCommand deleteBackupCommand = new BackupCommand(backupNames[i], ReplicationHandler.CMD_DELETE_BACKUP);
|
||||
deleteBackupCommand.runCommand();
|
||||
runBackupCommand(masterJetty, ReplicationHandler.CMD_DELETE_BACKUP, "&name=" +backupNames[i]);
|
||||
CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp);
|
||||
while (true) {
|
||||
boolean success = checkDeleteBackupStatus.fetchStatus();
|
||||
|
@ -267,53 +265,20 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
}
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
if (deleteBackupCommand.fail != null) {
|
||||
fail(deleteBackupCommand.fail);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class BackupCommand {
|
||||
String fail = null;
|
||||
final boolean addNumberToKeepInRequest;
|
||||
String backupKeepParamName;
|
||||
String backupName;
|
||||
String cmd;
|
||||
|
||||
BackupCommand(boolean addNumberToKeepInRequest, String backupKeepParamName, String command) {
|
||||
this.addNumberToKeepInRequest = addNumberToKeepInRequest;
|
||||
this.backupKeepParamName = backupKeepParamName;
|
||||
this.cmd = command;
|
||||
}
|
||||
BackupCommand(String backupName, String command) {
|
||||
this.backupName = backupName;
|
||||
addNumberToKeepInRequest = false;
|
||||
this.cmd = command;
|
||||
}
|
||||
|
||||
public void runCommand() {
|
||||
String masterUrl;
|
||||
if(backupName != null) {
|
||||
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
|
||||
"&name=" + backupName;
|
||||
} else {
|
||||
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
|
||||
(addNumberToKeepInRequest ? "&" + backupKeepParamName + "=2" : "");
|
||||
}
|
||||
|
||||
public static void runBackupCommand(JettySolrRunner masterJetty, String cmd, String params) throws IOException {
|
||||
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME
|
||||
+ "/replication?command=" + cmd + params;
|
||||
InputStream stream = null;
|
||||
try {
|
||||
URL url = new URL(masterUrl);
|
||||
stream = url.openStream();
|
||||
stream.close();
|
||||
} catch (Exception e) {
|
||||
fail = e.getMessage();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private class CheckDeleteBackupStatus {
|
||||
|
@ -349,6 +314,6 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
|||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
package org.apache.solr.handler;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrJettyTestBase;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.util.FileUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
@SolrTestCaseJ4.SuppressSSL // Currently unknown why SSL does not work with this test
|
||||
public class TestRestoreCore extends SolrJettyTestBase {
|
||||
|
||||
JettySolrRunner masterJetty;
|
||||
TestReplicationHandler.SolrInstance master = null;
|
||||
SolrClient masterClient;
|
||||
|
||||
private static final String CONF_DIR = "solr" + File.separator + "collection1" + File.separator + "conf"
|
||||
+ File.separator;
|
||||
|
||||
private static String context = "/solr";
|
||||
|
||||
private static JettySolrRunner createJetty(TestReplicationHandler.SolrInstance instance) throws Exception {
|
||||
FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), new File(instance.getHomeDir(), "solr.xml"));
|
||||
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), "/solr", 0);
|
||||
jetty.setDataDir(instance.getDataDir());
|
||||
jetty.start();
|
||||
return jetty;
|
||||
}
|
||||
|
||||
private static SolrClient createNewSolrClient(int port) {
|
||||
try {
|
||||
// setup the client...
|
||||
HttpSolrClient client = new HttpSolrClient(buildUrl(port, context) + "/" + DEFAULT_TEST_CORENAME);
|
||||
client.setConnectionTimeout(15000);
|
||||
client.setSoTimeout(60000);
|
||||
client.setDefaultMaxConnectionsPerHost(100);
|
||||
client.setMaxTotalConnections(100);
|
||||
return client;
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
String configFile = "solrconfig-master.xml";
|
||||
|
||||
master = new TestReplicationHandler.SolrInstance(createTempDir("solr-instance").toFile(), "master", null);
|
||||
master.setUp();
|
||||
master.copyConfigFile(CONF_DIR + configFile, "solrconfig.xml");
|
||||
|
||||
masterJetty = createJetty(master);
|
||||
masterClient = createNewSolrClient(masterJetty.getLocalPort());
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
masterClient.close();
|
||||
masterClient = null;
|
||||
masterJetty.stop();
|
||||
masterJetty = null;
|
||||
master = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleRestore() throws Exception {
|
||||
|
||||
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||
|
||||
String snapshotName;
|
||||
String location;
|
||||
String params = "";
|
||||
|
||||
//Use the default backup location or an externally provided location.
|
||||
if (random().nextBoolean()) {
|
||||
location = createTempDir().toFile().getAbsolutePath();
|
||||
params += "&location=" + URLEncoder.encode(location, "UTF-8");
|
||||
}
|
||||
|
||||
//named snapshot vs default snapshot name
|
||||
if (random().nextBoolean()) {
|
||||
snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
|
||||
params += "&name=" + snapshotName;
|
||||
}
|
||||
|
||||
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
|
||||
|
||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
|
||||
while (!checkBackupStatus.success) {
|
||||
checkBackupStatus.fetchStatus();
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
//Modify existing index before we call restore.
|
||||
|
||||
//Delete a few docs
|
||||
int numDeletes = TestUtil.nextInt(random(), 1, nDocs);
|
||||
for(int i=0; i<numDeletes; i++) {
|
||||
masterClient.deleteByQuery("id:" + i);
|
||||
}
|
||||
masterClient.commit();
|
||||
|
||||
//Add a few more
|
||||
int moreAdds = TestUtil.nextInt(random(), 1, 100);
|
||||
for (int i=0; i<moreAdds; i++) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", i + nDocs);
|
||||
doc.addField("name", "name = " + (i + nDocs));
|
||||
masterClient.add(doc);
|
||||
}
|
||||
//Purposely not calling commit once in a while. There can be some docs which are not committed
|
||||
if (usually()) {
|
||||
masterClient.commit();
|
||||
}
|
||||
|
||||
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
|
||||
|
||||
while (!fetchRestoreStatus()) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
//See if restore was successful by checking if all the docs are present again
|
||||
verifyDocs(nDocs);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailedRestore() throws Exception {
|
||||
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||
|
||||
String location = createTempDir().toFile().getAbsolutePath();
|
||||
String snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
|
||||
String params = "&name=" + snapshotName + "&location=" + URLEncoder.encode(location, "UTF-8");
|
||||
|
||||
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
|
||||
|
||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
|
||||
while (!checkBackupStatus.success) {
|
||||
checkBackupStatus.fetchStatus();
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
//Remove the segments_n file so that the backup index is corrupted.
|
||||
//Restore should fail and it should automatically rollback to the original index.
|
||||
Path restoreIndexPath = Paths.get(location, "snapshot." + snapshotName);
|
||||
Path segmentFileName = Files.newDirectoryStream(restoreIndexPath, IndexFileNames.SEGMENTS + "*").iterator().next();
|
||||
Files.delete(segmentFileName);
|
||||
|
||||
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
|
||||
|
||||
try {
|
||||
while (!fetchRestoreStatus()) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
fail("Should have thrown an error because restore could not have been successful");
|
||||
} catch (AssertionError e) {
|
||||
//supposed to happen
|
||||
}
|
||||
|
||||
verifyDocs(nDocs);
|
||||
|
||||
//make sure we can write to the index again
|
||||
nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||
verifyDocs(nDocs);
|
||||
|
||||
}
|
||||
|
||||
private void verifyDocs(int nDocs) throws SolrServerException, IOException {
|
||||
ModifiableSolrParams queryParams = new ModifiableSolrParams();
|
||||
queryParams.set("q", "*:*");
|
||||
QueryResponse response = masterClient.query(queryParams);
|
||||
|
||||
assertEquals(0, response.getStatus());
|
||||
assertEquals(nDocs, response.getResults().getNumFound());
|
||||
}
|
||||
|
||||
private boolean fetchRestoreStatus() throws IOException {
|
||||
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME +
|
||||
"/replication?command=" + ReplicationHandler.CMD_RESTORE_STATUS;
|
||||
final Pattern pException = Pattern.compile("<str name=\"exception\">(.*?)</str>");
|
||||
|
||||
InputStream stream = null;
|
||||
try {
|
||||
URL url = new URL(masterUrl);
|
||||
stream = url.openStream();
|
||||
String response = IOUtils.toString(stream, "UTF-8");
|
||||
if(pException.matcher(response).find()) {
|
||||
fail("Failed to complete restore action");
|
||||
}
|
||||
if(response.contains("<str name=\"status\">success</str>")) {
|
||||
return true;
|
||||
} else if (response.contains("<str name=\"status\">failed</str>")){
|
||||
fail("Restore Failed");
|
||||
}
|
||||
stream.close();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
0ec13f6423eb6d5858e229939a2bc118473ef94c
|
|
@ -0,0 +1 @@
|
|||
016d0bc512222f1253ee6b64d389c84e22f697f0
|
|
@ -1 +0,0 @@
|
|||
11393498b38e9695d0850cac26fde5613ae268b9
|
|
@ -0,0 +1 @@
|
|||
f5aa318bda4c6c8d688c9d00b90681dcd82ce636
|
|
@ -1 +0,0 @@
|
|||
f7899276dddd01d8a42ecfe27e7031fcf9824422
|
|
@ -0,0 +1 @@
|
|||
2f8757f5ac5e38f46c794e5229d1f3c522e9b1df
|
Loading…
Reference in New Issue