mirror of https://github.com/apache/lucene.git
merge trunk up to r1671137
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1671151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
363fc49258
|
@ -40,10 +40,15 @@ API Changes
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-6308: Span queries now share document conjunction/intersection
|
* LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share
|
||||||
|
document conjunction/intersection
|
||||||
code with boolean queries, and use two-phased iterators for
|
code with boolean queries, and use two-phased iterators for
|
||||||
faster intersection by avoiding loading positions in certain cases.
|
faster intersection by avoiding loading positions in certain cases.
|
||||||
(Paul Elschot, Robert Muir via Mike McCandless)
|
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-6352: Added a new query time join to the join module that uses
|
||||||
|
global ordinals, which is faster for subsequent joins between reopens.
|
||||||
|
(Martijn van Groningen, Adrien Grand)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
@ -52,6 +57,9 @@ Optimizations
|
||||||
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
|
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
|
||||||
Grand, Mike McCandless)
|
Grand, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
|
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
|
||||||
|
@ -123,6 +131,10 @@ Bug Fixes
|
||||||
DocumentsWriterStallControl to prevent hangs during indexing if we
|
DocumentsWriterStallControl to prevent hangs during indexing if we
|
||||||
miss a .notify/All somewhere (Mike McCandless)
|
miss a .notify/All somewhere (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state
|
||||||
|
that up to 3X (X = current index size) spare disk space may be needed
|
||||||
|
to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
|
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
|
||||||
|
|
|
@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
* longer be changed).</p>
|
* longer be changed).</p>
|
||||||
*
|
*
|
||||||
* <p>Note that this requires free space that is proportional
|
* <p>Note that this requires free space that is proportional
|
||||||
* to the size of the index in your Directory (2X if you're
|
* to the size of the index in your Directory: 2X if you are
|
||||||
* using compound file format). For example, if your index
|
* not using compound file format, and 3X if you are.
|
||||||
* size is 10 MB then you need an additional 10 MB free for
|
* For example, if your index size is 10 MB then you need
|
||||||
* this to complete (20 MB if you're using compound file
|
* an additional 20 MB free for this to complete (30 MB if
|
||||||
* format). This is also affected by the {@link Codec} that
|
* you're using compound file format). This is also affected
|
||||||
* is used to execute the merge, and may result in even a
|
* by the {@link Codec} that is used to execute the merge,
|
||||||
* bigger index. Also, it's best to call {@link #commit()}
|
* and may result in even a bigger index. Also, it's best
|
||||||
* afterwards, to allow IndexWriter to free up disk space.</p>
|
* to call {@link #commit()} afterwards, to allow IndexWriter
|
||||||
|
* to free up disk space.</p>
|
||||||
*
|
*
|
||||||
* <p>If some but not all readers re-open while merging
|
* <p>If some but not all readers re-open while merging
|
||||||
* is underway, this will cause {@code > 2X} temporary
|
* is underway, this will cause {@code > 2X} temporary
|
||||||
|
|
|
@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
scratch.bytes = thePayload;
|
scratch.bytes = thePayload;
|
||||||
scratch.offset = 0;
|
scratch.offset = 0;
|
||||||
scratch.length = thePayload.length;
|
scratch.length = thePayload.length;
|
||||||
payloadScore = function.currentScore(doc, fieldName, start, end,
|
payloadScore = function.currentScore(docID(), fieldName, start, end,
|
||||||
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
|
payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
|
||||||
spans.startPosition(), spans.endPosition(), scratch));
|
spans.startPosition(), spans.endPosition(), scratch));
|
||||||
++payloadsSeen;
|
++payloadsSeen;
|
||||||
}
|
}
|
||||||
|
@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
|
|
||||||
//
|
//
|
||||||
@Override
|
@Override
|
||||||
protected boolean setFreqCurrentDoc() throws IOException {
|
protected void setFreqCurrentDoc() throws IOException {
|
||||||
freq = 0.0f;
|
freq = 0.0f;
|
||||||
payloadScore = 0;
|
payloadScore = 0;
|
||||||
payloadsSeen = 0;
|
payloadsSeen = 0;
|
||||||
|
@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
getPayloads(spansArr);
|
getPayloads(spansArr);
|
||||||
startPos = spans.nextStartPosition();
|
startPos = spans.nextStartPosition();
|
||||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float scoreCurrentDoc() throws IOException {
|
||||||
|
return super.scoreCurrentDoc()
|
||||||
return super.score()
|
* function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
|
||||||
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean setFreqCurrentDoc() throws IOException {
|
protected void setFreqCurrentDoc() throws IOException {
|
||||||
freq = 0.0f;
|
freq = 0.0f;
|
||||||
numMatches = 0;
|
numMatches = 0;
|
||||||
payloadScore = 0;
|
payloadScore = 0;
|
||||||
|
@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
|
|
||||||
startPos = spans.nextStartPosition();
|
startPos = spans.nextStartPosition();
|
||||||
} while (startPos != Spans.NO_MORE_POSITIONS);
|
} while (startPos != Spans.NO_MORE_POSITIONS);
|
||||||
return freq != 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void processPayload(Similarity similarity) throws IOException {
|
protected void processPayload(Similarity similarity) throws IOException {
|
||||||
|
@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
final PostingsEnum postings = termSpans.getPostings();
|
final PostingsEnum postings = termSpans.getPostings();
|
||||||
payload = postings.getPayload();
|
payload = postings.getPayload();
|
||||||
if (payload != null) {
|
if (payload != null) {
|
||||||
payloadScore = function.currentScore(doc, term.field(),
|
payloadScore = function.currentScore(docID(), term.field(),
|
||||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
|
||||||
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
|
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
|
||||||
} else {
|
} else {
|
||||||
payloadScore = function.currentScore(doc, term.field(),
|
payloadScore = function.currentScore(docID(), term.field(),
|
||||||
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
|
||||||
}
|
}
|
||||||
payloadsSeen++;
|
payloadsSeen++;
|
||||||
|
@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
* @throws IOException if there is a low-level I/O error
|
* @throws IOException if there is a low-level I/O error
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float scoreCurrentDoc() throws IOException {
|
||||||
|
|
||||||
return includeSpanScore ? getSpanScore() * getPayloadScore()
|
return includeSpanScore ? getSpanScore() * getPayloadScore()
|
||||||
: getPayloadScore();
|
: getPayloadScore();
|
||||||
}
|
}
|
||||||
|
@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
* @see #score()
|
* @see #score()
|
||||||
*/
|
*/
|
||||||
protected float getSpanScore() throws IOException {
|
protected float getSpanScore() throws IOException {
|
||||||
return super.score();
|
return super.scoreCurrentDoc();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
* {@link PayloadFunction#docScore(int, String, int, float)}
|
* {@link PayloadFunction#docScore(int, String, int, float)}
|
||||||
*/
|
*/
|
||||||
protected float getPayloadScore() {
|
protected float getPayloadScore() {
|
||||||
return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
|
return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,11 @@ import java.util.Objects;
|
||||||
* Common super class for un/ordered Spans
|
* Common super class for un/ordered Spans
|
||||||
*/
|
*/
|
||||||
abstract class NearSpans extends Spans {
|
abstract class NearSpans extends Spans {
|
||||||
SpanNearQuery query;
|
final SpanNearQuery query;
|
||||||
int allowedSlop;
|
final int allowedSlop;
|
||||||
|
|
||||||
List<Spans> subSpans; // in query order
|
final Spans[] subSpans; // in query order
|
||||||
DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
final DocIdSetIterator conjunction; // use to move to next doc with all clauses
|
||||||
boolean atFirstInCurrentDoc;
|
boolean atFirstInCurrentDoc;
|
||||||
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
|
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ abstract class NearSpans extends Spans {
|
||||||
if (subSpans.size() < 2) {
|
if (subSpans.size() < 2) {
|
||||||
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
|
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
|
||||||
}
|
}
|
||||||
this.subSpans = Objects.requireNonNull(subSpans); // in query order
|
this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
|
||||||
this.conjunction = ConjunctionDISI.intersect(subSpans);
|
this.conjunction = ConjunctionDISI.intersect(subSpans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,13 +91,8 @@ abstract class NearSpans extends Spans {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Spans[] subSpansArray = null; // init only when needed.
|
|
||||||
|
|
||||||
public Spans[] getSubSpans() {
|
public Spans[] getSubSpans() {
|
||||||
if (subSpansArray == null) {
|
return subSpans;
|
||||||
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
|
|
||||||
}
|
|
||||||
return subSpansArray;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,12 +18,8 @@ package org.apache.lucene.search.spans;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||||
* where the subspans do not overlap and have a maximum slop between them,
|
* where the subspans do not overlap and have a maximum slop between them,
|
||||||
|
@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans {
|
||||||
* otherwise at least one is exhausted in the current doc.
|
* otherwise at least one is exhausted in the current doc.
|
||||||
*/
|
*/
|
||||||
private boolean stretchToOrder() throws IOException {
|
private boolean stretchToOrder() throws IOException {
|
||||||
Spans prevSpans = subSpans.get(0);
|
Spans prevSpans = subSpans[0];
|
||||||
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
|
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
|
||||||
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
|
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
|
||||||
for (int i = 1; i < subSpans.size(); i++) {
|
for (int i = 1; i < subSpans.length; i++) {
|
||||||
Spans spans = subSpans.get(i);
|
Spans spans = subSpans[i];
|
||||||
assert spans.startPosition() != NO_MORE_POSITIONS;
|
assert spans.startPosition() != NO_MORE_POSITIONS;
|
||||||
assert spans.endPosition() != NO_MORE_POSITIONS;
|
assert spans.endPosition() != NO_MORE_POSITIONS;
|
||||||
|
|
||||||
|
@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans {
|
||||||
* on all subSpans, except the last one, in reverse order.
|
* on all subSpans, except the last one, in reverse order.
|
||||||
*/
|
*/
|
||||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
Spans lastSubSpans = subSpans[subSpans.length - 1];
|
||||||
matchStart = lastSubSpans.startPosition();
|
matchStart = lastSubSpans.startPosition();
|
||||||
matchEnd = lastSubSpans.endPosition();
|
matchEnd = lastSubSpans.endPosition();
|
||||||
|
|
||||||
int matchSlop = 0;
|
int matchSlop = 0;
|
||||||
int lastStart = matchStart;
|
int lastStart = matchStart;
|
||||||
int lastEnd = matchEnd;
|
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
Spans prevSpans = subSpans[i];
|
||||||
Spans prevSpans = subSpans.get(i);
|
|
||||||
|
|
||||||
int prevStart = prevSpans.startPosition();
|
int prevStart = prevSpans.startPosition();
|
||||||
int prevEnd = prevSpans.endPosition();
|
int prevEnd = prevSpans.endPosition();
|
||||||
|
@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans {
|
||||||
*/
|
*/
|
||||||
matchStart = prevStart;
|
matchStart = prevStart;
|
||||||
lastStart = prevStart;
|
lastStart = prevStart;
|
||||||
lastEnd = prevEnd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean match = matchSlop <= allowedSlop;
|
boolean match = matchSlop <= allowedSlop;
|
||||||
|
@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans {
|
||||||
return atFirstInCurrentDoc ? -1 : matchEnd;
|
return atFirstInCurrentDoc ? -1 : matchEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Throws an UnsupportedOperationException */
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<byte[]> getPayload() throws IOException {
|
public Collection<byte[]> getPayload() throws IOException {
|
||||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Throws an UnsupportedOperationException */
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isPayloadAvailable() {
|
public boolean isPayloadAvailable() {
|
||||||
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||||
* Also collect the payloads.
|
* Also collect the payloads.
|
||||||
*/
|
*/
|
||||||
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
protected boolean shrinkToAfterShortestMatch() throws IOException {
|
||||||
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
|
Spans lastSubSpans = subSpans[subSpans.length - 1];
|
||||||
matchStart = lastSubSpans.startPosition();
|
matchStart = lastSubSpans.startPosition();
|
||||||
matchEnd = lastSubSpans.endPosition();
|
matchEnd = lastSubSpans.endPosition();
|
||||||
|
|
||||||
|
@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||||
|
|
||||||
int matchSlop = 0;
|
int matchSlop = 0;
|
||||||
int lastStart = matchStart;
|
int lastStart = matchStart;
|
||||||
int lastEnd = matchEnd;
|
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||||
for (int i = subSpans.size() - 2; i >= 0; i--) {
|
Spans prevSpans = subSpans[i];
|
||||||
Spans prevSpans = subSpans.get(i);
|
|
||||||
|
|
||||||
if (prevSpans.isPayloadAvailable()) {
|
if (prevSpans.isPayloadAvailable()) {
|
||||||
Collection<byte[]> payload = prevSpans.getPayload();
|
Collection<byte[]> payload = prevSpans.getPayload();
|
||||||
|
@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
|
||||||
*/
|
*/
|
||||||
matchStart = prevStart;
|
matchStart = prevStart;
|
||||||
lastStart = prevStart;
|
lastStart = prevStart;
|
||||||
lastEnd = prevEnd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean match = matchSlop <= allowedSlop;
|
boolean match = matchSlop <= allowedSlop;
|
||||||
|
|
|
@ -18,19 +18,17 @@ package org.apache.lucene.search.spans;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
@ -132,9 +130,14 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Terms terms = context.reader().terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
return null; // field does not exist
|
||||||
|
}
|
||||||
|
|
||||||
// all NearSpans require at least two subSpans
|
// all NearSpans require at least two subSpans
|
||||||
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
|
||||||
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
|
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
|
||||||
: new NearSpansOrdered(this, subSpans);
|
: new NearSpansOrdered(this, subSpans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
||||||
startPos = in.nextStartPosition();
|
startPos = in.nextStartPosition();
|
||||||
assert startPos != NO_MORE_POSITIONS;
|
assert startPos != NO_MORE_POSITIONS;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch(acceptPosition(this)) {
|
switch(acceptPosition(in)) {
|
||||||
case YES:
|
case YES:
|
||||||
atFirstInCurrentDoc = true;
|
atFirstInCurrentDoc = true;
|
||||||
return in.docID();
|
return in.docID();
|
||||||
|
@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
||||||
if (startPos == NO_MORE_POSITIONS) {
|
if (startPos == NO_MORE_POSITIONS) {
|
||||||
return NO_MORE_POSITIONS;
|
return NO_MORE_POSITIONS;
|
||||||
}
|
}
|
||||||
switch(acceptPosition(this)) {
|
switch(acceptPosition(in)) {
|
||||||
case YES:
|
case YES:
|
||||||
return startPos;
|
return startPos;
|
||||||
case NO:
|
case NO:
|
||||||
|
|
|
@ -21,48 +21,58 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Public for extension only.
|
* Public for extension only.
|
||||||
*/
|
*/
|
||||||
public class SpanScorer extends Scorer {
|
public class SpanScorer extends Scorer {
|
||||||
protected Spans spans;
|
/** underlying spans we are scoring from */
|
||||||
|
protected final Spans spans;
|
||||||
protected int doc;
|
/** similarity used in default score impl */
|
||||||
protected float freq;
|
|
||||||
protected int numMatches;
|
|
||||||
protected final Similarity.SimScorer docScorer;
|
protected final Similarity.SimScorer docScorer;
|
||||||
|
|
||||||
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
|
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
|
||||||
throws IOException {
|
protected float freq;
|
||||||
|
/** number of matches (computed in setFreqCurrentDoc) */
|
||||||
|
protected int numMatches;
|
||||||
|
|
||||||
|
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
|
||||||
|
|
||||||
|
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = Objects.requireNonNull(docScorer);
|
this.docScorer = Objects.requireNonNull(docScorer);
|
||||||
this.spans = Objects.requireNonNull(spans);
|
this.spans = Objects.requireNonNull(spans);
|
||||||
this.doc = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public final int nextDoc() throws IOException {
|
||||||
int prevDoc = doc;
|
return spans.nextDoc();
|
||||||
doc = spans.nextDoc();
|
|
||||||
if (doc != NO_MORE_DOCS) {
|
|
||||||
setFreqCurrentDoc();
|
|
||||||
}
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public final int advance(int target) throws IOException {
|
||||||
int prevDoc = doc;
|
return spans.advance(target);
|
||||||
doc = spans.advance(target);
|
|
||||||
if (doc != NO_MORE_DOCS) {
|
|
||||||
setFreqCurrentDoc();
|
|
||||||
}
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean setFreqCurrentDoc() throws IOException {
|
/**
|
||||||
|
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
|
||||||
|
*/
|
||||||
|
private final void ensureFreq() throws IOException {
|
||||||
|
int currentDoc = spans.docID();
|
||||||
|
if (lastScoredDoc != currentDoc) {
|
||||||
|
setFreqCurrentDoc();
|
||||||
|
lastScoredDoc = currentDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets {@link #freq} and {@link #numMatches} for the current document.
|
||||||
|
* <p>
|
||||||
|
* This will be called at most once per document.
|
||||||
|
*/
|
||||||
|
protected void setFreqCurrentDoc() throws IOException {
|
||||||
freq = 0.0f;
|
freq = 0.0f;
|
||||||
numMatches = 0;
|
numMatches = 0;
|
||||||
|
|
||||||
|
@ -90,34 +100,46 @@ public class SpanScorer extends Scorer {
|
||||||
|
|
||||||
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
|
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
|
||||||
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
|
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
/**
|
||||||
|
* Score the current doc. The default implementation scores the doc
|
||||||
|
* with the similarity using the slop-adjusted {@link #freq}.
|
||||||
|
*/
|
||||||
|
protected float scoreCurrentDoc() throws IOException {
|
||||||
|
return docScorer.score(spans.docID(), freq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() { return doc; }
|
public final int docID() { return spans.docID(); }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public final float score() throws IOException {
|
||||||
float s = docScorer.score(doc, freq);
|
ensureFreq();
|
||||||
return s;
|
return scoreCurrentDoc();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int freq() throws IOException {
|
public final int freq() throws IOException {
|
||||||
|
ensureFreq();
|
||||||
return numMatches;
|
return numMatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
// only public so .payloads can see it.
|
// only public so .payloads can see it.
|
||||||
public float sloppyFreq() throws IOException {
|
public final float sloppyFreq() throws IOException {
|
||||||
|
ensureFreq();
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long cost() {
|
public final long cost() {
|
||||||
return spans.cost();
|
return spans.cost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
|
return spans.asTwoPhaseIterator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
|
||||||
assertTrue("forceMerge used too much temporary space: starting usage was "
|
assertTrue("forceMerge used too much temporary space: starting usage was "
|
||||||
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage
|
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage
|
||||||
+ " bytes; max temp usage was " + maxDiskUsage
|
+ " bytes; max temp usage was " + maxDiskUsage
|
||||||
+ " but should have been " + (3 * maxStartFinalDiskUsage)
|
+ " but should have been at most " + (4 * maxStartFinalDiskUsage)
|
||||||
+ " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage);
|
+ " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -162,7 +162,12 @@ final class JustCompileSearchSpans {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean setFreqCurrentDoc() {
|
protected void setFreqCurrentDoc() {
|
||||||
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected float scoreCurrentDoc() throws IOException {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SearchEquivalenceTestBase;
|
import org.apache.lucene.search.SearchEquivalenceTestBase;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
|
||||||
|
@ -106,4 +107,122 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
||||||
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
|
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
|
||||||
assertSubsetOf(q1, q2);
|
assertSubsetOf(q1, q2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** SpanNearQuery([A B], N, false) ⊆ SpanNearQuery([A B], N+1, false) */
|
||||||
|
public void testSpanNearIncreasingSloppiness() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||||
|
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanNearQuery([A B C], N, false) ⊆ SpanNearQuery([A B C], N+1, false) */
|
||||||
|
public void testSpanNearIncreasingSloppiness3() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
Term t3 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||||
|
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanNearQuery([A B], N, true) ⊆ SpanNearQuery([A B], N+1, true) */
|
||||||
|
public void testSpanNearIncreasingOrderedSloppiness() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
|
||||||
|
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanNearQuery([A B C], N, true) ⊆ SpanNearQuery([A B C], N+1, true) */
|
||||||
|
public void testSpanNearIncreasingOrderedSloppiness3() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
Term t3 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
SpanNearQuery q1 = new SpanNearQuery(subquery, i, true);
|
||||||
|
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
|
||||||
|
public void testSpanFirstTerm() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
|
||||||
|
Query q2 = new TermQuery(t1);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery(A, N) ⊆ SpanFirstQuery(A, N+1) */
|
||||||
|
public void testSpanFirstTermIncreasing() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
|
||||||
|
Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery(A, ∞) = TermQuery(A) */
|
||||||
|
public void testSpanFirstTermEverything() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE);
|
||||||
|
Query q2 = new TermQuery(t1);
|
||||||
|
assertSameSet(q1, q2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
|
||||||
|
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||||
|
public void testSpanFirstNear() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||||
|
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
Query q1 = new SpanFirstQuery(nearQuery, i);
|
||||||
|
Query q2 = nearQuery;
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
|
||||||
|
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||||
|
public void testSpanFirstNearIncreasing() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||||
|
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
Query q1 = new SpanFirstQuery(nearQuery, i);
|
||||||
|
Query q2 = new SpanFirstQuery(nearQuery, i+1);
|
||||||
|
assertSubsetOf(q1, q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
|
||||||
|
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||||
|
public void testSpanFirstNearEverything() throws Exception {
|
||||||
|
Term t1 = randomTerm();
|
||||||
|
Term t2 = randomTerm();
|
||||||
|
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||||
|
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||||
|
Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE);
|
||||||
|
Query q2 = nearQuery;
|
||||||
|
assertSameSet(q1, q2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -306,6 +306,9 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
Bits acceptDocs = context.reader().getLiveDocs();
|
Bits acceptDocs = context.reader().getLiveDocs();
|
||||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||||
|
if (spans == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// collect span positions
|
// collect span positions
|
||||||
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
package org.apache.lucene.search.highlight;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class MissesTest extends LuceneTestCase {
|
||||||
|
public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||||
|
final Query query = new TermQuery(new Term("test", "foo"));
|
||||||
|
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||||
|
assertEquals("this is a <B>foo</B> bar example",
|
||||||
|
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||||
|
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||||
|
final BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term("test", "foo")), Occur.MUST);
|
||||||
|
query.add(new TermQuery(new Term("test", "bar")), Occur.MUST);
|
||||||
|
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||||
|
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||||
|
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||||
|
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||||
|
final PhraseQuery query = new PhraseQuery();
|
||||||
|
query.add(new Term("test", "foo"));
|
||||||
|
query.add(new Term("test", "bar"));
|
||||||
|
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||||
|
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||||
|
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||||
|
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||||
|
final Query query = new SpanNearQuery(new SpanQuery[] {
|
||||||
|
new SpanTermQuery(new Term("test", "foo")),
|
||||||
|
new SpanTermQuery(new Term("test", "bar"))}, 0, true);
|
||||||
|
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||||
|
assertEquals("this is a <B>foo</B> <B>bar</B> example",
|
||||||
|
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
|
||||||
|
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -116,9 +116,9 @@ org.apache.hadoop.version = 2.6.0
|
||||||
|
|
||||||
# The httpcore version is often different from the httpclient and httpmime versions,
|
# The httpcore version is often different from the httpclient and httpmime versions,
|
||||||
# so the httpcore version value should not share the same symbolic name with them.
|
# so the httpcore version value should not share the same symbolic name with them.
|
||||||
/org.apache.httpcomponents/httpclient = 4.3.1
|
/org.apache.httpcomponents/httpclient = 4.4.1
|
||||||
/org.apache.httpcomponents/httpcore = 4.3
|
/org.apache.httpcomponents/httpcore = 4.4.1
|
||||||
/org.apache.httpcomponents/httpmime = 4.3.1
|
/org.apache.httpcomponents/httpmime = 4.4.1
|
||||||
|
|
||||||
/org.apache.ivy/ivy = 2.3.0
|
/org.apache.ivy/ivy = 2.3.0
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
abstract class BaseGlobalOrdinalScorer extends Scorer {
|
||||||
|
|
||||||
|
final LongBitSet foundOrds;
|
||||||
|
final SortedDocValues values;
|
||||||
|
final Scorer approximationScorer;
|
||||||
|
|
||||||
|
float score;
|
||||||
|
|
||||||
|
public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
|
||||||
|
super(weight);
|
||||||
|
this.foundOrds = foundOrds;
|
||||||
|
this.values = values;
|
||||||
|
this.approximationScorer = approximationScorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return approximationScorer.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(approximationScorer.docID() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
|
final DocIdSetIterator approximation = new DocIdSetIterator() {
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return approximationScorer.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return approximationScorer.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
return approximationScorer.advance(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return approximationScorer.cost();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return createTwoPhaseIterator(approximation);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return approximationScorer.cost();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation);
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.apache.lucene.util.LongValues;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collector that collects all ordinals from a specified field matching the query.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
final class GlobalOrdinalsCollector implements Collector {
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
final LongBitSet collectedOrds;
|
||||||
|
final MultiDocValues.OrdinalMap ordinalMap;
|
||||||
|
|
||||||
|
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||||
|
this.field = field;
|
||||||
|
this.ordinalMap = ordinalMap;
|
||||||
|
this.collectedOrds = new LongBitSet(valueCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LongBitSet getCollectorOrdinals() {
|
||||||
|
return collectedOrds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||||
|
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
|
||||||
|
if (ordinalMap != null) {
|
||||||
|
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
|
||||||
|
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
|
||||||
|
} else {
|
||||||
|
return new SegmentOrdinalCollector(docTermOrds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class OrdinalMapCollector implements LeafCollector {
|
||||||
|
|
||||||
|
private final SortedDocValues docTermOrds;
|
||||||
|
private final LongValues segmentOrdToGlobalOrdLookup;
|
||||||
|
|
||||||
|
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
|
||||||
|
this.docTermOrds = docTermOrds;
|
||||||
|
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
collectedOrds.set(globalOrd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class SegmentOrdinalCollector implements LeafCollector {
|
||||||
|
|
||||||
|
private final SortedDocValues docTermOrds;
|
||||||
|
|
||||||
|
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
|
||||||
|
this.docTermOrds = docTermOrds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
collectedOrds.set(segmentOrd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,245 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.ComplexExplanation;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.apache.lucene.util.LongValues;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
final class GlobalOrdinalsQuery extends Query {
|
||||||
|
|
||||||
|
// All the ords of matching docs found with OrdinalsCollector.
|
||||||
|
private final LongBitSet foundOrds;
|
||||||
|
private final String joinField;
|
||||||
|
private final MultiDocValues.OrdinalMap globalOrds;
|
||||||
|
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
|
||||||
|
private final Query toQuery;
|
||||||
|
|
||||||
|
// just for hashcode and equals:
|
||||||
|
private final Query fromQuery;
|
||||||
|
private final IndexReader indexReader;
|
||||||
|
|
||||||
|
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
|
||||||
|
this.foundOrds = foundOrds;
|
||||||
|
this.joinField = joinField;
|
||||||
|
this.globalOrds = globalOrds;
|
||||||
|
this.toQuery = toQuery;
|
||||||
|
this.fromQuery = fromQuery;
|
||||||
|
this.indexReader = indexReader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
|
return new W(this, toQuery.createWeight(searcher, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
fromQuery.extractTerms(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
if (!super.equals(o)) return false;
|
||||||
|
|
||||||
|
GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o;
|
||||||
|
|
||||||
|
if (!fromQuery.equals(that.fromQuery)) return false;
|
||||||
|
if (!joinField.equals(that.joinField)) return false;
|
||||||
|
if (!toQuery.equals(that.toQuery)) return false;
|
||||||
|
if (!indexReader.equals(that.indexReader)) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = super.hashCode();
|
||||||
|
result = 31 * result + joinField.hashCode();
|
||||||
|
result = 31 * result + toQuery.hashCode();
|
||||||
|
result = 31 * result + fromQuery.hashCode();
|
||||||
|
result = 31 * result + indexReader.hashCode();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "GlobalOrdinalsQuery{" +
|
||||||
|
"joinField=" + joinField +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
final class W extends Weight {
|
||||||
|
|
||||||
|
private final Weight approximationWeight;
|
||||||
|
|
||||||
|
private float queryNorm;
|
||||||
|
private float queryWeight;
|
||||||
|
|
||||||
|
W(Query query, Weight approximationWeight) {
|
||||||
|
super(query);
|
||||||
|
this.approximationWeight = approximationWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||||
|
if (values != null) {
|
||||||
|
int segmentOrd = values.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||||
|
return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getValueForNormalization() throws IOException {
|
||||||
|
queryWeight = getBoost();
|
||||||
|
return queryWeight * queryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void normalize(float norm, float topLevelBoost) {
|
||||||
|
this.queryNorm = norm * topLevelBoost;
|
||||||
|
queryWeight *= this.queryNorm;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||||
|
if (values == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
|
||||||
|
if (approximationScorer == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (globalOrds != null) {
|
||||||
|
return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
|
||||||
|
} {
|
||||||
|
return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||||
|
|
||||||
|
final LongValues segmentOrdToGlobalOrdLookup;
|
||||||
|
|
||||||
|
public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
|
||||||
|
super(weight, foundOrds, values, approximationScorer);
|
||||||
|
this.score = score;
|
||||||
|
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||||
|
final long segmentOrd = values.getOrd(docID);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
if (foundOrds.get(globalOrd)) {
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||||
|
return new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
if (foundOrds.get(globalOrd)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
|
||||||
|
|
||||||
|
public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
|
||||||
|
super(weight, foundOrds, values, approximationScorer);
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||||
|
final long segmentOrd = values.getOrd(docID);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
if (foundOrds.get(segmentOrd)) {
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||||
|
return new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
if (foundOrds.get(segmentOrd)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,250 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.apache.lucene.util.LongValues;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
abstract class GlobalOrdinalsWithScoreCollector implements Collector {
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
final MultiDocValues.OrdinalMap ordinalMap;
|
||||||
|
final LongBitSet collectedOrds;
|
||||||
|
protected final Scores scores;
|
||||||
|
|
||||||
|
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||||
|
if (valueCount > Integer.MAX_VALUE) {
|
||||||
|
// We simply don't support more than
|
||||||
|
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
this.ordinalMap = ordinalMap;
|
||||||
|
this.collectedOrds = new LongBitSet(valueCount);
|
||||||
|
this.scores = new Scores(valueCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LongBitSet getCollectorOrdinals() {
|
||||||
|
return collectedOrds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float score(int globalOrdinal) {
|
||||||
|
return scores.getScore(globalOrdinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract void doScore(int globalOrd, float existingScore, float newScore);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||||
|
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
|
||||||
|
if (ordinalMap != null) {
|
||||||
|
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
|
||||||
|
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
|
||||||
|
} else {
|
||||||
|
return new SegmentOrdinalCollector(docTermOrds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
final class OrdinalMapCollector implements LeafCollector {
|
||||||
|
|
||||||
|
private final SortedDocValues docTermOrds;
|
||||||
|
private final LongValues segmentOrdToGlobalOrdLookup;
|
||||||
|
private Scorer scorer;
|
||||||
|
|
||||||
|
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
|
||||||
|
this.docTermOrds = docTermOrds;
|
||||||
|
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
final long segmentOrd = docTermOrds.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
collectedOrds.set(globalOrd);
|
||||||
|
float existingScore = scores.getScore(globalOrd);
|
||||||
|
float newScore = scorer.score();
|
||||||
|
doScore(globalOrd, existingScore, newScore);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class SegmentOrdinalCollector implements LeafCollector {
|
||||||
|
|
||||||
|
private final SortedDocValues docTermOrds;
|
||||||
|
private Scorer scorer;
|
||||||
|
|
||||||
|
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
|
||||||
|
this.docTermOrds = docTermOrds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
final int segmentOrd = docTermOrds.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
collectedOrds.set(segmentOrd);
|
||||||
|
float existingScore = scores.getScore(segmentOrd);
|
||||||
|
float newScore = scorer.score();
|
||||||
|
doScore(segmentOrd, existingScore, newScore);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Max extends GlobalOrdinalsWithScoreCollector {
|
||||||
|
|
||||||
|
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||||
|
super(field, ordinalMap, valueCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||||
|
scores.setScore(globalOrd, Math.max(existingScore, newScore));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Sum extends GlobalOrdinalsWithScoreCollector {
|
||||||
|
|
||||||
|
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||||
|
super(field, ordinalMap, valueCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||||
|
scores.setScore(globalOrd, existingScore + newScore);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Avg extends GlobalOrdinalsWithScoreCollector {
|
||||||
|
|
||||||
|
private final Occurrences occurrences;
|
||||||
|
|
||||||
|
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
|
||||||
|
super(field, ordinalMap, valueCount);
|
||||||
|
this.occurrences = new Occurrences(valueCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doScore(int globalOrd, float existingScore, float newScore) {
|
||||||
|
occurrences.increment(globalOrd);
|
||||||
|
scores.setScore(globalOrd, existingScore + newScore);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score(int globalOrdinal) {
|
||||||
|
return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
|
||||||
|
// the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
|
||||||
|
// Also working with smaller arrays is supposed to be more gc friendly
|
||||||
|
//
|
||||||
|
// At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
|
||||||
|
// then just using an array.
|
||||||
|
|
||||||
|
// Maybe this should become a method parameter?
|
||||||
|
static final int arraySize = 4096;
|
||||||
|
|
||||||
|
static final class Scores {
|
||||||
|
|
||||||
|
final float[][] blocks;
|
||||||
|
|
||||||
|
private Scores(long valueCount) {
|
||||||
|
long blockSize = valueCount + arraySize - 1;
|
||||||
|
blocks = new float[(int) ((blockSize) / arraySize)][];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScore(int globalOrdinal, float score) {
|
||||||
|
int block = globalOrdinal / arraySize;
|
||||||
|
int offset = globalOrdinal % arraySize;
|
||||||
|
float[] scores = blocks[block];
|
||||||
|
if (scores == null) {
|
||||||
|
blocks[block] = scores = new float[arraySize];
|
||||||
|
}
|
||||||
|
scores[offset] = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getScore(int globalOrdinal) {
|
||||||
|
int block = globalOrdinal / arraySize;
|
||||||
|
int offset = globalOrdinal % arraySize;
|
||||||
|
float[] scores = blocks[block];
|
||||||
|
float score;
|
||||||
|
if (scores != null) {
|
||||||
|
score = scores[offset];
|
||||||
|
} else {
|
||||||
|
score = 0f;
|
||||||
|
}
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Occurrences {
|
||||||
|
|
||||||
|
final int[][] blocks;
|
||||||
|
|
||||||
|
private Occurrences(long valueCount) {
|
||||||
|
long blockSize = valueCount + arraySize - 1;
|
||||||
|
blocks = new int[(int) (blockSize / arraySize)][];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void increment(int globalOrdinal) {
|
||||||
|
int block = globalOrdinal / arraySize;
|
||||||
|
int offset = globalOrdinal % arraySize;
|
||||||
|
int[] occurrences = blocks[block];
|
||||||
|
if (occurrences == null) {
|
||||||
|
blocks[block] = occurrences = new int[arraySize];
|
||||||
|
}
|
||||||
|
occurrences[offset]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getOccurence(int globalOrdinal) {
|
||||||
|
int block = globalOrdinal / arraySize;
|
||||||
|
int offset = globalOrdinal % arraySize;
|
||||||
|
int[] occurrences = blocks[block];
|
||||||
|
return occurrences[offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,256 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.ComplexExplanation;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LongValues;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
final class GlobalOrdinalsWithScoreQuery extends Query {
|
||||||
|
|
||||||
|
private final GlobalOrdinalsWithScoreCollector collector;
|
||||||
|
private final String joinField;
|
||||||
|
private final MultiDocValues.OrdinalMap globalOrds;
|
||||||
|
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
|
||||||
|
private final Query toQuery;
|
||||||
|
|
||||||
|
// just for hashcode and equals:
|
||||||
|
private final Query fromQuery;
|
||||||
|
private final IndexReader indexReader;
|
||||||
|
|
||||||
|
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
|
||||||
|
this.collector = collector;
|
||||||
|
this.joinField = joinField;
|
||||||
|
this.globalOrds = globalOrds;
|
||||||
|
this.toQuery = toQuery;
|
||||||
|
this.fromQuery = fromQuery;
|
||||||
|
this.indexReader = indexReader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
|
return new W(this, toQuery.createWeight(searcher, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
fromQuery.extractTerms(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
if (!super.equals(o)) return false;
|
||||||
|
|
||||||
|
GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
|
||||||
|
|
||||||
|
if (!fromQuery.equals(that.fromQuery)) return false;
|
||||||
|
if (!joinField.equals(that.joinField)) return false;
|
||||||
|
if (!toQuery.equals(that.toQuery)) return false;
|
||||||
|
if (!indexReader.equals(that.indexReader)) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = super.hashCode();
|
||||||
|
result = 31 * result + joinField.hashCode();
|
||||||
|
result = 31 * result + toQuery.hashCode();
|
||||||
|
result = 31 * result + fromQuery.hashCode();
|
||||||
|
result = 31 * result + indexReader.hashCode();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "GlobalOrdinalsQuery{" +
|
||||||
|
"joinField=" + joinField +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
final class W extends Weight {
|
||||||
|
|
||||||
|
private final Weight approximationWeight;
|
||||||
|
|
||||||
|
private float queryNorm;
|
||||||
|
private float queryWeight;
|
||||||
|
|
||||||
|
W(Query query, Weight approximationWeight) {
|
||||||
|
super(query);
|
||||||
|
this.approximationWeight = approximationWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||||
|
if (values != null) {
|
||||||
|
int segmentOrd = values.getOrd(doc);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final float score;
|
||||||
|
if (globalOrds != null) {
|
||||||
|
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
|
||||||
|
score = collector.scores.getScore((int) globalOrd);
|
||||||
|
} else {
|
||||||
|
score = collector.score(segmentOrd);
|
||||||
|
}
|
||||||
|
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||||
|
return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getValueForNormalization() throws IOException {
|
||||||
|
queryWeight = getBoost();
|
||||||
|
return queryWeight * queryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void normalize(float norm, float topLevelBoost) {
|
||||||
|
this.queryNorm = norm * topLevelBoost;
|
||||||
|
queryWeight *= this.queryNorm;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||||
|
if (values == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
|
||||||
|
if (approximationScorer == null) {
|
||||||
|
return null;
|
||||||
|
} else if (globalOrds != null) {
|
||||||
|
return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
|
||||||
|
} else {
|
||||||
|
return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||||
|
|
||||||
|
final LongValues segmentOrdToGlobalOrdLookup;
|
||||||
|
final GlobalOrdinalsWithScoreCollector collector;
|
||||||
|
|
||||||
|
public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
|
||||||
|
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
|
||||||
|
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
|
||||||
|
this.collector = collector;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||||
|
final long segmentOrd = values.getOrd(docID);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
if (foundOrds.get(globalOrd)) {
|
||||||
|
score = collector.score((int) globalOrd);
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||||
|
return new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
final long segmentOrd = values.getOrd(approximationScorer.docID());
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
|
if (foundOrds.get(globalOrd)) {
|
||||||
|
score = collector.score((int) globalOrd);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
|
||||||
|
|
||||||
|
final GlobalOrdinalsWithScoreCollector collector;
|
||||||
|
|
||||||
|
public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
|
||||||
|
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
|
||||||
|
this.collector = collector;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
|
||||||
|
final int segmentOrd = values.getOrd(docID);
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
if (foundOrds.get(segmentOrd)) {
|
||||||
|
score = collector.score(segmentOrd);
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
|
||||||
|
return new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
final int segmentOrd = values.getOrd(approximationScorer.docID());
|
||||||
|
if (segmentOrd != -1) {
|
||||||
|
if (foundOrds.get(segmentOrd)) {
|
||||||
|
score = collector.score(segmentOrd);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,7 +17,12 @@ package org.apache.lucene.search.join;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -90,4 +95,78 @@ public final class JoinUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A query time join using global ordinals over a dedicated join field.
|
||||||
|
*
|
||||||
|
* This join has certain restrictions and requirements:
|
||||||
|
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
|
||||||
|
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
|
||||||
|
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
|
||||||
|
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
|
||||||
|
* should store the join values as UTF-8 strings.
|
||||||
|
* 4) An ordinal map must be provided that is created on top of the join field.
|
||||||
|
*
|
||||||
|
* @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
|
||||||
|
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
|
||||||
|
* @param toQuery The query identifying all documents on the "to" side.
|
||||||
|
* @param searcher The index searcher used to execute the from query
|
||||||
|
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
|
||||||
|
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
|
||||||
|
* needs to be provided.
|
||||||
|
* @return a {@link Query} instance that can be used to join documents based on the join field
|
||||||
|
* @throws IOException If I/O related errors occur
|
||||||
|
*/
|
||||||
|
public static Query createJoinQuery(String joinField,
|
||||||
|
Query fromQuery,
|
||||||
|
Query toQuery,
|
||||||
|
IndexSearcher searcher,
|
||||||
|
ScoreMode scoreMode,
|
||||||
|
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
|
||||||
|
IndexReader indexReader = searcher.getIndexReader();
|
||||||
|
int numSegments = indexReader.leaves().size();
|
||||||
|
final long valueCount;
|
||||||
|
if (numSegments == 0) {
|
||||||
|
return new MatchNoDocsQuery();
|
||||||
|
} else if (numSegments == 1) {
|
||||||
|
// No need to use the ordinal map, because there is just one segment.
|
||||||
|
ordinalMap = null;
|
||||||
|
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
|
||||||
|
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
|
||||||
|
if (joinSortedDocValues != null) {
|
||||||
|
valueCount = joinSortedDocValues.getValueCount();
|
||||||
|
} else {
|
||||||
|
return new MatchNoDocsQuery();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ordinalMap == null) {
|
||||||
|
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
|
||||||
|
}
|
||||||
|
valueCount = ordinalMap.getValueCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
Query rewrittenFromQuery = searcher.rewrite(fromQuery);
|
||||||
|
if (scoreMode == ScoreMode.None) {
|
||||||
|
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
|
||||||
|
searcher.search(fromQuery, globalOrdinalsCollector);
|
||||||
|
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
|
||||||
|
}
|
||||||
|
|
||||||
|
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
|
||||||
|
switch (scoreMode) {
|
||||||
|
case Total:
|
||||||
|
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount);
|
||||||
|
break;
|
||||||
|
case Max:
|
||||||
|
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount);
|
||||||
|
break;
|
||||||
|
case Avg:
|
||||||
|
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
|
||||||
|
}
|
||||||
|
searcher.search(fromQuery, globalOrdinalsWithScoreCollector);
|
||||||
|
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,19 +17,6 @@ package org.apache.lucene.search.join;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.SortedSet;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.FilterLeafCollector;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.MultiCollector;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
public class TestJoinUtil extends LuceneTestCase {
|
public class TestJoinUtil extends LuceneTestCase {
|
||||||
|
|
||||||
public void testSimple() throws Exception {
|
public void testSimple() throws Exception {
|
||||||
|
@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSimpleOrdinalsJoin() throws Exception {
|
||||||
|
final String idField = "id";
|
||||||
|
final String productIdField = "productId";
|
||||||
|
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
|
||||||
|
final String typeField = "type";
|
||||||
|
// A single sorted doc values field that holds the join values for all document types.
|
||||||
|
// Typically during indexing a schema will automatically create this field with the values
|
||||||
|
final String joinField = idField + productIdField;
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(
|
||||||
|
random(),
|
||||||
|
dir,
|
||||||
|
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
|
||||||
|
|
||||||
|
// 0
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new TextField(idField, "1", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||||
|
doc.add(new TextField("description", "random text", Field.Store.NO));
|
||||||
|
doc.add(new TextField("name", "name1", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 1
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new TextField(productIdField, "1", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||||
|
doc.add(new TextField("price", "10.0", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 2
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new TextField(productIdField, "1", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||||
|
doc.add(new TextField("price", "20.0", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 3
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new TextField(idField, "2", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||||
|
doc.add(new TextField("description", "more random text", Field.Store.NO));
|
||||||
|
doc.add(new TextField("name", "name2", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.commit();
|
||||||
|
|
||||||
|
// 4
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new TextField(productIdField, "2", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||||
|
doc.add(new TextField("price", "10.0", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 5
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new TextField(productIdField, "2", Field.Store.NO));
|
||||||
|
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||||
|
doc.add(new TextField("price", "20.0", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
IndexReader r = indexSearcher.getIndexReader();
|
||||||
|
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
LeafReader leafReader = r.leaves().get(i).reader();
|
||||||
|
values[i] = DocValues.getSorted(leafReader, joinField);
|
||||||
|
}
|
||||||
|
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
|
||||||
|
r.getCoreCacheKey(), values, PackedInts.DEFAULT
|
||||||
|
);
|
||||||
|
|
||||||
|
Query toQuery = new TermQuery(new Term(typeField, "price"));
|
||||||
|
Query fromQuery = new TermQuery(new Term("name", "name2"));
|
||||||
|
// Search for product and return prices
|
||||||
|
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||||
|
TopDocs result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(4, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(5, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
fromQuery = new TermQuery(new Term("name", "name1"));
|
||||||
|
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||||
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(1, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(2, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
// Search for prices and return products
|
||||||
|
fromQuery = new TermQuery(new Term("price", "20.0"));
|
||||||
|
toQuery = new TermQuery(new Term(typeField, "product"));
|
||||||
|
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
|
||||||
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(0, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(3, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
indexSearcher.getIndexReader().close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomOrdinalsJoin() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(
|
||||||
|
random(),
|
||||||
|
dir,
|
||||||
|
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
|
||||||
|
);
|
||||||
|
IndexIterationContext context = createContext(100, w, false, true);
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
|
||||||
|
w.close();
|
||||||
|
IndexReader topLevelReader = DirectoryReader.open(dir);
|
||||||
|
|
||||||
|
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
|
||||||
|
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
|
||||||
|
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
|
||||||
|
}
|
||||||
|
context.ordinalMap = MultiDocValues.OrdinalMap.build(
|
||||||
|
topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
|
||||||
|
);
|
||||||
|
IndexSearcher indexSearcher = newSearcher(topLevelReader);
|
||||||
|
|
||||||
|
int r = random().nextInt(context.randomUniqueValues.length);
|
||||||
|
boolean from = context.randomFrom[r];
|
||||||
|
String randomValue = context.randomUniqueValues[r];
|
||||||
|
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
|
||||||
|
|
||||||
|
final Query actualQuery = new TermQuery(new Term("value", randomValue));
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("actualQuery=" + actualQuery);
|
||||||
|
}
|
||||||
|
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("scoreMode=" + scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Query joinQuery;
|
||||||
|
if (from) {
|
||||||
|
BooleanQuery fromQuery = new BooleanQuery();
|
||||||
|
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
|
||||||
|
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
|
||||||
|
Query toQuery = new TermQuery(new Term("type", "to"));
|
||||||
|
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
|
||||||
|
} else {
|
||||||
|
BooleanQuery fromQuery = new BooleanQuery();
|
||||||
|
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
|
||||||
|
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
|
||||||
|
Query toQuery = new TermQuery(new Term("type", "from"));
|
||||||
|
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
|
||||||
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("joinQuery=" + joinQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||||
|
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
|
||||||
|
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
|
||||||
|
assertBitSet(expectedResult, actualResult, indexSearcher);
|
||||||
|
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||||
|
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||||
|
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
|
||||||
|
topLevelReader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
|
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
|
||||||
public void testOverflowTermsWithScoreCollector() throws Exception {
|
public void testOverflowTermsWithScoreCollector() throws Exception {
|
||||||
test300spartans(true, ScoreMode.Avg);
|
test300spartans(true, ScoreMode.Avg);
|
||||||
|
@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
dir,
|
dir,
|
||||||
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
|
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
|
||||||
);
|
);
|
||||||
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument);
|
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false);
|
||||||
|
|
||||||
IndexReader topLevelReader = w.getReader();
|
IndexReader topLevelReader = w.getReader();
|
||||||
w.close();
|
w.close();
|
||||||
|
@ -485,28 +662,20 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
|
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
|
||||||
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||||
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
|
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
|
||||||
indexSearcher.search(joinQuery, new Collector() {
|
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
|
||||||
|
|
||||||
@Override
|
|
||||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
|
||||||
final int docBase = context.docBase;
|
|
||||||
final LeafCollector in = topScoreDocCollector.getLeafCollector(context);
|
|
||||||
return new FilterLeafCollector(in) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void collect(int doc) throws IOException {
|
|
||||||
super.collect(doc);
|
|
||||||
actualResult.set(doc + docBase);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean needsScores() {
|
|
||||||
return topScoreDocCollector.needsScores();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
// Asserting bit set...
|
// Asserting bit set...
|
||||||
|
assertBitSet(expectedResult, actualResult, indexSearcher);
|
||||||
|
// Asserting TopDocs...
|
||||||
|
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||||
|
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||||
|
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
|
||||||
|
}
|
||||||
|
topLevelReader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("expected cardinality:" + expectedResult.cardinality());
|
System.out.println("expected cardinality:" + expectedResult.cardinality());
|
||||||
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
|
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
|
||||||
|
@ -520,14 +689,13 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertEquals(expectedResult, actualResult);
|
assertEquals(expectedResult, actualResult);
|
||||||
|
}
|
||||||
|
|
||||||
// Asserting TopDocs...
|
private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
|
||||||
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
|
||||||
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
|
||||||
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
|
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
|
||||||
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
|
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
|
||||||
if (scoreMode == ScoreMode.None) {
|
if (scoreMode == ScoreMode.None) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
|
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
|
||||||
|
@ -542,16 +710,16 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
|
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
topLevelReader.close();
|
|
||||||
dir.close();
|
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException {
|
||||||
}
|
return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin);
|
||||||
}
|
}
|
||||||
|
|
||||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException {
|
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
|
||||||
return createContext(nDocs, writer, writer, multipleValuesPerDocument);
|
if (globalOrdinalJoin) {
|
||||||
|
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
|
||||||
}
|
}
|
||||||
|
|
||||||
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException {
|
|
||||||
IndexIterationContext context = new IndexIterationContext();
|
IndexIterationContext context = new IndexIterationContext();
|
||||||
int numRandomValues = nDocs / 2;
|
int numRandomValues = nDocs / 2;
|
||||||
context.randomUniqueValues = new String[numRandomValues];
|
context.randomUniqueValues = new String[numRandomValues];
|
||||||
|
@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
for (int i = 0; i < numRandomValues; i++) {
|
for (int i = 0; i < numRandomValues; i++) {
|
||||||
String uniqueRandomValue;
|
String uniqueRandomValue;
|
||||||
do {
|
do {
|
||||||
uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
|
// uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
|
||||||
// uniqueRandomValue = _TestUtil.randomSimpleString(random);
|
uniqueRandomValue = TestUtil.randomSimpleString(random());
|
||||||
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
||||||
// Generate unique values and empty strings aren't allowed.
|
// Generate unique values and empty strings aren't allowed.
|
||||||
trackSet.add(uniqueRandomValue);
|
trackSet.add(uniqueRandomValue);
|
||||||
|
@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
boolean from = context.randomFrom[randomI];
|
boolean from = context.randomFrom[randomI];
|
||||||
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
|
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
|
||||||
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
|
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
|
||||||
|
if (globalOrdinalJoin) {
|
||||||
|
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
|
||||||
|
}
|
||||||
for (int j = 0; j < numberOfLinkValues; j++) {
|
for (int j = 0; j < numberOfLinkValues; j++) {
|
||||||
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
|
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
|
||||||
docs[i].linkValues.add(linkValue);
|
docs[i].linkValues.add(linkValue);
|
||||||
if (from) {
|
if (from) {
|
||||||
if (!context.fromDocuments.containsKey(linkValue)) {
|
if (!context.fromDocuments.containsKey(linkValue)) {
|
||||||
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
context.fromDocuments.put(linkValue, new ArrayList<>());
|
||||||
}
|
}
|
||||||
if (!context.randomValueFromDocs.containsKey(value)) {
|
if (!context.randomValueFromDocs.containsKey(value)) {
|
||||||
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
|
context.randomValueFromDocs.put(value, new ArrayList<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
context.fromDocuments.get(linkValue).add(docs[i]);
|
context.fromDocuments.get(linkValue).add(docs[i]);
|
||||||
|
@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
} else {
|
} else {
|
||||||
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
|
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
|
||||||
}
|
}
|
||||||
|
if (globalOrdinalJoin) {
|
||||||
|
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!context.toDocuments.containsKey(linkValue)) {
|
if (!context.toDocuments.containsKey(linkValue)) {
|
||||||
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
context.toDocuments.put(linkValue, new ArrayList<>());
|
||||||
}
|
}
|
||||||
if (!context.randomValueToDocs.containsKey(value)) {
|
if (!context.randomValueToDocs.containsKey(value)) {
|
||||||
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
|
context.randomValueToDocs.put(value, new ArrayList<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
context.toDocuments.get(linkValue).add(docs[i]);
|
context.toDocuments.get(linkValue).add(docs[i]);
|
||||||
|
@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
} else {
|
} else {
|
||||||
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
|
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
|
||||||
}
|
}
|
||||||
|
if (globalOrdinalJoin) {
|
||||||
|
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
if (joinScore == null) {
|
if (joinScore == null) {
|
||||||
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
|
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
|
||||||
|
}
|
||||||
joinScore.addScore(scorer.score());
|
joinScore.addScore(scorer.score());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
|
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
|
||||||
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
|
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
|
||||||
|
|
||||||
|
MultiDocValues.OrdinalMap ordinalMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class RandomDoc {
|
private static class RandomDoc {
|
||||||
|
@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class BitSetCollector extends SimpleCollector {
|
||||||
|
|
||||||
|
private final BitSet bitSet;
|
||||||
|
private int docBase;
|
||||||
|
|
||||||
|
private BitSetCollector(BitSet bitSet) {
|
||||||
|
this.bitSet = bitSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
bitSet.set(docBase + doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||||
|
docBase = context.docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
0ec13f6423eb6d5858e229939a2bc118473ef94c
|
|
|
@ -0,0 +1 @@
|
||||||
|
016d0bc512222f1253ee6b64d389c84e22f697f0
|
|
@ -1 +0,0 @@
|
||||||
11393498b38e9695d0850cac26fde5613ae268b9
|
|
|
@ -0,0 +1 @@
|
||||||
|
f5aa318bda4c6c8d688c9d00b90681dcd82ce636
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.StorableField;
|
|
||||||
import org.apache.lucene.index.StoredDocument;
|
import org.apache.lucene.index.StoredDocument;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.TermsQuery;
|
import org.apache.lucene.queries.TermsQuery;
|
||||||
|
@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
@ -158,10 +155,11 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
weights[i] = Math.abs(random().nextLong());
|
weights[i] = Math.abs(random().nextLong());
|
||||||
document.add(newSuggestField("suggest_field", "abc", weights[i]));
|
document.add(newSuggestField("suggest_field", "abc", weights[i]));
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
}
|
|
||||||
if (rarely()) {
|
if (usually()) {
|
||||||
iw.commit();
|
iw.commit();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DirectoryReader reader = iw.getReader();
|
DirectoryReader reader = iw.getReader();
|
||||||
Entry[] expectedEntries = new Entry[num];
|
Entry[] expectedEntries = new Entry[num];
|
||||||
|
@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
document.clear();
|
document.clear();
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
iw.deleteDocuments(new Term("str_field", "delete"));
|
iw.deleteDocuments(new Term("str_field", "delete"));
|
||||||
|
|
||||||
DirectoryReader reader = DirectoryReader.open(iw, false);
|
DirectoryReader reader = DirectoryReader.open(iw, true);
|
||||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
||||||
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
|
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
|
||||||
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
|
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
|
||||||
|
@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
|
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
document.clear();
|
document.clear();
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
|
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
|
||||||
|
@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
document.add(newStringField("delete", "delete", Field.Store.NO));
|
document.add(newStringField("delete", "delete", Field.Store.NO));
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
document.clear();
|
document.clear();
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
iw.deleteDocuments(new Term("delete", "delete"));
|
iw.deleteDocuments(new Term("delete", "delete"));
|
||||||
|
|
||||||
DirectoryReader reader = DirectoryReader.open(iw, false);
|
DirectoryReader reader = DirectoryReader.open(iw, true);
|
||||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
|
||||||
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
|
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
|
||||||
assertThat(suggest.totalHits, equalTo(0));
|
assertThat(suggest.totalHits, equalTo(0));
|
||||||
|
@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
document.add(new IntField("weight_fld", i, Field.Store.YES));
|
document.add(new IntField("weight_fld", i, Field.Store.YES));
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
document.clear();
|
document.clear();
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
|
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
|
||||||
|
@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
document.add(new IntField("filter_int_fld", i, Field.Store.NO));
|
document.add(new IntField("filter_int_fld", i, Field.Store.NO));
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
document.clear();
|
document.clear();
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader reader = iw.getReader();
|
DirectoryReader reader = iw.getReader();
|
||||||
|
@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase {
|
||||||
document.add(newSuggestField("suggest_field", suggest, weight));
|
document.add(newSuggestField("suggest_field", suggest, weight));
|
||||||
mappings.put(suggest, weight);
|
mappings.put(suggest, weight);
|
||||||
iw.addDocument(document);
|
iw.addDocument(document);
|
||||||
|
|
||||||
|
if (usually()) {
|
||||||
|
iw.commit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader reader = iw.getReader();
|
DirectoryReader reader = iw.getReader();
|
||||||
|
|
|
@ -263,6 +263,9 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase {
|
||||||
* Both queries will be filtered by <code>filter</code>
|
* Both queries will be filtered by <code>filter</code>
|
||||||
*/
|
*/
|
||||||
protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
|
protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
|
||||||
|
QueryUtils.check(q1);
|
||||||
|
QueryUtils.check(q2);
|
||||||
|
|
||||||
if (filter != null) {
|
if (filter != null) {
|
||||||
q1 = new FilteredQuery(q1, filter);
|
q1 = new FilteredQuery(q1, filter);
|
||||||
q2 = new FilteredQuery(q2, filter);
|
q2 = new FilteredQuery(q2, filter);
|
||||||
|
|
|
@ -78,6 +78,9 @@ Detailed Change List
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-6637: Solr should have a way to restore a core from a backed up index.
|
||||||
|
(Varun Thacker, noble, shalin)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -90,6 +93,11 @@ Optimizations
|
||||||
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
|
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
|
||||||
(Stephan Lagraulet via Varun Thacker)
|
(Stephan Lagraulet via Varun Thacker)
|
||||||
|
|
||||||
|
Other Changes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-6865: Upgrade HttpClient to 4.4.1 (Shawn Heisey)
|
||||||
|
|
||||||
================== 5.1.0 ==================
|
================== 5.1.0 ==================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.nio.channels.FileChannel;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.NoSuchFileException;
|
import java.nio.file.NoSuchFileException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -246,31 +247,31 @@ public class IndexFetcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException {
|
boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException {
|
||||||
return fetchLatestIndex(core, forceReplication, false);
|
return fetchLatestIndex(forceReplication, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This command downloads all the necessary files from master to install a index commit point. Only changed files are
|
* This command downloads all the necessary files from master to install a index commit point. Only changed files are
|
||||||
* downloaded. It also downloads the conf files (if they are modified).
|
* downloaded. It also downloads the conf files (if they are modified).
|
||||||
*
|
*
|
||||||
* @param core the SolrCore
|
|
||||||
* @param forceReplication force a replication in all cases
|
* @param forceReplication force a replication in all cases
|
||||||
* @param forceCoreReload force a core reload in all cases
|
* @param forceCoreReload force a core reload in all cases
|
||||||
* @return true on success, false if slave is already in sync
|
* @return true on success, false if slave is already in sync
|
||||||
* @throws IOException if an exception occurs
|
* @throws IOException if an exception occurs
|
||||||
*/
|
*/
|
||||||
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
|
boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
|
||||||
|
|
||||||
boolean cleanupDone = false;
|
boolean cleanupDone = false;
|
||||||
boolean successfulInstall = false;
|
boolean successfulInstall = false;
|
||||||
replicationStartTime = System.currentTimeMillis();
|
replicationStartTime = System.currentTimeMillis();
|
||||||
Directory tmpIndexDir = null;
|
Directory tmpIndexDir = null;
|
||||||
String tmpIndex = null;
|
String tmpIndex;
|
||||||
Directory indexDir = null;
|
Directory indexDir = null;
|
||||||
String indexDirPath = null;
|
String indexDirPath;
|
||||||
boolean deleteTmpIdxDir = true;
|
boolean deleteTmpIdxDir = true;
|
||||||
|
|
||||||
if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) {
|
if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) {
|
||||||
// if the last replication was not a success, we force a full replication
|
// if the last replication was not a success, we force a full replication
|
||||||
// when we are a bit more confident we may want to try a partial replication
|
// when we are a bit more confident we may want to try a partial replication
|
||||||
// if the error is connection related or something, but we have to be careful
|
// if the error is connection related or something, but we have to be careful
|
||||||
|
@ -279,7 +280,7 @@ public class IndexFetcher {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
//get the current 'replicateable' index version in the master
|
//get the current 'replicateable' index version in the master
|
||||||
NamedList response = null;
|
NamedList response;
|
||||||
try {
|
try {
|
||||||
response = getLatestVersion();
|
response = getLatestVersion();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -290,12 +291,12 @@ public class IndexFetcher {
|
||||||
long latestGeneration = (Long) response.get(GENERATION);
|
long latestGeneration = (Long) response.get(GENERATION);
|
||||||
|
|
||||||
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
||||||
IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
|
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
|
||||||
if (commit == null) {
|
if (commit == null) {
|
||||||
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
|
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
|
||||||
RefCounted<SolrIndexSearcher> searcherRefCounted = null;
|
RefCounted<SolrIndexSearcher> searcherRefCounted = null;
|
||||||
try {
|
try {
|
||||||
searcherRefCounted = core.getNewestSearcher(false);
|
searcherRefCounted = solrCore.getNewestSearcher(false);
|
||||||
if (searcherRefCounted == null) {
|
if (searcherRefCounted == null) {
|
||||||
LOG.warn("No open searcher found - fetch aborted");
|
LOG.warn("No open searcher found - fetch aborted");
|
||||||
return false;
|
return false;
|
||||||
|
@ -312,15 +313,14 @@ public class IndexFetcher {
|
||||||
if (forceReplication && commit.getGeneration() != 0) {
|
if (forceReplication && commit.getGeneration() != 0) {
|
||||||
// since we won't get the files for an empty index,
|
// since we won't get the files for an empty index,
|
||||||
// we just clear ours and commit
|
// we just clear ours and commit
|
||||||
RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
|
RefCounted<IndexWriter> iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore);
|
||||||
try {
|
try {
|
||||||
iw.get().deleteAll();
|
iw.get().deleteAll();
|
||||||
} finally {
|
} finally {
|
||||||
iw.decref();
|
iw.decref();
|
||||||
}
|
}
|
||||||
SolrQueryRequest req = new LocalSolrQueryRequest(core,
|
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams());
|
||||||
new ModifiableSolrParams());
|
solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
|
||||||
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//there is nothing to be replicated
|
//there is nothing to be replicated
|
||||||
|
@ -340,7 +340,9 @@ public class IndexFetcher {
|
||||||
// get the list of files first
|
// get the list of files first
|
||||||
fetchFileList(latestGeneration);
|
fetchFileList(latestGeneration);
|
||||||
// this can happen if the commit point is deleted before we fetch the file list.
|
// this can happen if the commit point is deleted before we fetch the file list.
|
||||||
if(filesToDownload.isEmpty()) return false;
|
if (filesToDownload.isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
|
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
|
||||||
|
|
||||||
// Create the sync service
|
// Create the sync service
|
||||||
|
@ -354,13 +356,13 @@ public class IndexFetcher {
|
||||||
|| commit.getGeneration() >= latestGeneration || forceReplication;
|
|| commit.getGeneration() >= latestGeneration || forceReplication;
|
||||||
|
|
||||||
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
|
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
|
||||||
tmpIndex = createTempindexDir(core, tmpIdxDirName);
|
tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString();
|
||||||
|
|
||||||
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||||
|
|
||||||
// cindex dir...
|
// cindex dir...
|
||||||
indexDirPath = core.getIndexDir();
|
indexDirPath = solrCore.getIndexDir();
|
||||||
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -404,7 +406,7 @@ public class IndexFetcher {
|
||||||
} finally {
|
} finally {
|
||||||
writer.decref();
|
writer.decref();
|
||||||
}
|
}
|
||||||
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
|
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true);
|
||||||
}
|
}
|
||||||
boolean reloadCore = false;
|
boolean reloadCore = false;
|
||||||
|
|
||||||
|
@ -422,7 +424,7 @@ public class IndexFetcher {
|
||||||
reloadCore = true;
|
reloadCore = true;
|
||||||
downloadConfFiles(confFilesToDownload, latestGeneration);
|
downloadConfFiles(confFilesToDownload, latestGeneration);
|
||||||
if (isFullCopyNeeded) {
|
if (isFullCopyNeeded) {
|
||||||
successfulInstall = modifyIndexProps(tmpIdxDirName);
|
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
|
||||||
deleteTmpIdxDir = false;
|
deleteTmpIdxDir = false;
|
||||||
} else {
|
} else {
|
||||||
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
||||||
|
@ -433,8 +435,8 @@ public class IndexFetcher {
|
||||||
// may be closed
|
// may be closed
|
||||||
if (indexDir != null) {
|
if (indexDir != null) {
|
||||||
LOG.info("removing old index directory " + indexDir);
|
LOG.info("removing old index directory " + indexDir);
|
||||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||||
core.getDirectoryFactory().remove(indexDir);
|
solrCore.getDirectoryFactory().remove(indexDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -446,7 +448,7 @@ public class IndexFetcher {
|
||||||
} else {
|
} else {
|
||||||
terminateAndWaitFsyncService();
|
terminateAndWaitFsyncService();
|
||||||
if (isFullCopyNeeded) {
|
if (isFullCopyNeeded) {
|
||||||
successfulInstall = modifyIndexProps(tmpIdxDirName);
|
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
|
||||||
deleteTmpIdxDir = false;
|
deleteTmpIdxDir = false;
|
||||||
} else {
|
} else {
|
||||||
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
|
||||||
|
@ -458,13 +460,13 @@ public class IndexFetcher {
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (!isFullCopyNeeded) {
|
if (!isFullCopyNeeded) {
|
||||||
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core);
|
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// we must reload the core after we open the IW back up
|
// we must reload the core after we open the IW back up
|
||||||
if (successfulInstall && (reloadCore || forceCoreReload)) {
|
if (successfulInstall && (reloadCore || forceCoreReload)) {
|
||||||
LOG.info("Reloading SolrCore {}", core.getName());
|
LOG.info("Reloading SolrCore {}", solrCore.getName());
|
||||||
reloadCore();
|
reloadCore();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,8 +476,8 @@ public class IndexFetcher {
|
||||||
// may be closed
|
// may be closed
|
||||||
if (indexDir != null) {
|
if (indexDir != null) {
|
||||||
LOG.info("removing old index directory " + indexDir);
|
LOG.info("removing old index directory " + indexDir);
|
||||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||||
core.getDirectoryFactory().remove(indexDir);
|
solrCore.getDirectoryFactory().remove(indexDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (isFullCopyNeeded) {
|
if (isFullCopyNeeded) {
|
||||||
|
@ -486,13 +488,13 @@ public class IndexFetcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
|
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
|
||||||
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||||
cleanupDone = true;
|
cleanupDone = true;
|
||||||
// we try with a full copy of the index
|
// we try with a full copy of the index
|
||||||
LOG.warn(
|
LOG.warn(
|
||||||
"Replication attempt was not successful - trying a full index replication reloadCore={}",
|
"Replication attempt was not successful - trying a full index replication reloadCore={}",
|
||||||
reloadCore);
|
reloadCore);
|
||||||
successfulInstall = fetchLatestIndex(core, true, reloadCore);
|
successfulInstall = fetchLatestIndex(true, reloadCore);
|
||||||
}
|
}
|
||||||
|
|
||||||
replicationStartTime = 0;
|
replicationStartTime = 0;
|
||||||
|
@ -505,11 +507,11 @@ public class IndexFetcher {
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new InterruptedException("Index fetch interrupted");
|
throw new InterruptedException("Index fetch interrupted");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
|
throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (!cleanupDone) {
|
if (!cleanupDone) {
|
||||||
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -719,15 +721,6 @@ public class IndexFetcher {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* All the files are copied to a temp dir first
|
|
||||||
*/
|
|
||||||
private String createTempindexDir(SolrCore core, String tmpIdxDirName) {
|
|
||||||
// TODO: there should probably be a DirectoryFactory#concatPath(parent, name)
|
|
||||||
// or something
|
|
||||||
return core.getDataDir() + tmpIdxDirName;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void reloadCore() {
|
private void reloadCore() {
|
||||||
final CountDownLatch latch = new CountDownLatch(1);
|
final CountDownLatch latch = new CountDownLatch(1);
|
||||||
new Thread() {
|
new Thread() {
|
||||||
|
@ -815,12 +808,12 @@ public class IndexFetcher {
|
||||||
|| filename.startsWith("segments_") || size < _100K);
|
|| filename.startsWith("segments_") || size < _100K);
|
||||||
}
|
}
|
||||||
|
|
||||||
static class CompareResult {
|
protected static class CompareResult {
|
||||||
boolean equal = false;
|
boolean equal = false;
|
||||||
boolean checkSummed = false;
|
boolean checkSummed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
|
protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
|
||||||
CompareResult compareResult = new CompareResult();
|
CompareResult compareResult = new CompareResult();
|
||||||
try {
|
try {
|
||||||
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
|
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
|
||||||
|
@ -887,8 +880,8 @@ public class IndexFetcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* All the files which are common between master and slave must have same size else we assume they are
|
* All the files which are common between master and slave must have same size and same checksum else we assume
|
||||||
* not compatible (stale).
|
* they are not compatible (stale).
|
||||||
*
|
*
|
||||||
* @return true if the index stale and we need to download a fresh copy, false otherwise.
|
* @return true if the index stale and we need to download a fresh copy, false otherwise.
|
||||||
* @throws IOException if low level io error
|
* @throws IOException if low level io error
|
||||||
|
@ -1034,7 +1027,7 @@ public class IndexFetcher {
|
||||||
/**
|
/**
|
||||||
* If the index is stale by any chance, load index from a different dir in the data dir.
|
* If the index is stale by any chance, load index from a different dir in the data dir.
|
||||||
*/
|
*/
|
||||||
private boolean modifyIndexProps(String tmpIdxDirName) {
|
protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) {
|
||||||
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
|
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
|
||||||
Properties p = new Properties();
|
Properties p = new Properties();
|
||||||
Directory dir = null;
|
Directory dir = null;
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
package org.apache.solr.handler;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
class OldBackupDirectory implements Comparable<OldBackupDirectory> {
|
||||||
|
File dir;
|
||||||
|
Date timestamp;
|
||||||
|
private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
|
||||||
|
|
||||||
|
OldBackupDirectory(File dir) {
|
||||||
|
if(dir.isDirectory()) {
|
||||||
|
Matcher m = dirNamePattern.matcher(dir.getName());
|
||||||
|
if(m.find()) {
|
||||||
|
try {
|
||||||
|
this.dir = dir;
|
||||||
|
this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1));
|
||||||
|
} catch(Exception e) {
|
||||||
|
this.dir = null;
|
||||||
|
this.timestamp = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public int compareTo(OldBackupDirectory that) {
|
||||||
|
return that.timestamp.compareTo(this.timestamp);
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,7 +36,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
|
|
||||||
private ReentrantLock indexFetchLock = new ReentrantLock();
|
private ReentrantLock indexFetchLock = new ReentrantLock();
|
||||||
|
|
||||||
|
private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor(
|
||||||
|
new DefaultSolrThreadFactory("restoreExecutor"));
|
||||||
|
|
||||||
|
private volatile Future<Boolean> restoreFuture;
|
||||||
|
|
||||||
|
private volatile String currentRestoreName;
|
||||||
|
|
||||||
private String includeConfFiles;
|
private String includeConfFiles;
|
||||||
|
|
||||||
private NamedList<String> confFileNameAlias = new NamedList<>();
|
private NamedList<String> confFileNameAlias = new NamedList<>();
|
||||||
|
@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
} else if (command.equalsIgnoreCase(CMD_BACKUP)) {
|
} else if (command.equalsIgnoreCase(CMD_BACKUP)) {
|
||||||
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
|
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
|
||||||
rsp.add(STATUS, OK_STATUS);
|
rsp.add(STATUS, OK_STATUS);
|
||||||
|
} else if (command.equalsIgnoreCase(CMD_RESTORE)) {
|
||||||
|
restore(new ModifiableSolrParams(solrParams), rsp, req);
|
||||||
|
rsp.add(STATUS, OK_STATUS);
|
||||||
|
} else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) {
|
||||||
|
rsp.add(CMD_RESTORE_STATUS, getRestoreStatus());
|
||||||
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
|
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
|
||||||
deleteSnapshot(new ModifiableSolrParams(solrParams));
|
deleteSnapshot(new ModifiableSolrParams(solrParams));
|
||||||
rsp.add(STATUS, OK_STATUS);
|
rsp.add(STATUS, OK_STATUS);
|
||||||
|
@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
|
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
|
||||||
}
|
}
|
||||||
|
|
||||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
|
SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
|
||||||
snapShooter.validateDeleteSnapshot();
|
snapShooter.validateDeleteSnapshot();
|
||||||
snapShooter.deleteSnapAsync(this);
|
snapShooter.deleteSnapAsync(this);
|
||||||
}
|
}
|
||||||
|
@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
} else {
|
} else {
|
||||||
currentIndexFetcher = pollingIndexFetcher;
|
currentIndexFetcher = pollingIndexFetcher;
|
||||||
}
|
}
|
||||||
return currentIndexFetcher.fetchLatestIndex(core, forceReplication);
|
return currentIndexFetcher.fetchLatestIndex(forceReplication);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
SolrException.log(LOG, "Index fetch failed ", e);
|
SolrException.log(LOG, "Index fetch failed ", e);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
return indexFetchLock.isLocked();
|
return indexFetchLock.isLocked();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) {
|
||||||
|
if (restoreFuture != null && !restoreFuture.isDone()) {
|
||||||
|
throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" +
|
||||||
|
"for the same core");
|
||||||
|
}
|
||||||
|
String name = params.get(NAME);
|
||||||
|
String location = params.get(LOCATION);
|
||||||
|
|
||||||
|
//If location is not provided then assume that the restore index is present inside the data directory.
|
||||||
|
if (location == null) {
|
||||||
|
location = core.getDataDir();
|
||||||
|
}
|
||||||
|
|
||||||
|
//If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format)
|
||||||
|
//snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp.
|
||||||
|
if (name == null) {
|
||||||
|
File[] files = new File(location).listFiles();
|
||||||
|
List<OldBackupDirectory> dirs = new ArrayList<>();
|
||||||
|
for (File f : files) {
|
||||||
|
OldBackupDirectory obd = new OldBackupDirectory(f);
|
||||||
|
if (obd.dir != null) {
|
||||||
|
dirs.add(obd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collections.sort(dirs);
|
||||||
|
if (dirs.size() == 0) {
|
||||||
|
throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir());
|
||||||
|
}
|
||||||
|
name = dirs.get(0).dir.getName();
|
||||||
|
} else {
|
||||||
|
//"snapshot." is prefixed by snapshooter
|
||||||
|
name = "snapshot." + name;
|
||||||
|
}
|
||||||
|
|
||||||
|
RestoreCore restoreCore = new RestoreCore(core, location, name);
|
||||||
|
restoreFuture = restoreExecutor.submit(restoreCore);
|
||||||
|
currentRestoreName = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
private NamedList<Object> getRestoreStatus() {
|
||||||
|
NamedList<Object> status = new SimpleOrderedMap<>();
|
||||||
|
|
||||||
|
if (restoreFuture == null) {
|
||||||
|
status.add(STATUS, "No restore actions in progress");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
status.add("snapshotName", currentRestoreName);
|
||||||
|
if (restoreFuture.isDone()) {
|
||||||
|
try {
|
||||||
|
boolean success = restoreFuture.get();
|
||||||
|
if (success) {
|
||||||
|
status.add(STATUS, SUCCESS);
|
||||||
|
} else {
|
||||||
|
status.add(STATUS, FAILED);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
status.add(STATUS, FAILED);
|
||||||
|
status.add(EXCEPTION, e.getMessage());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
status.add(STATUS, "In Progress");
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
private void doSnapShoot(SolrParams params, SolrQueryResponse rsp,
|
private void doSnapShoot(SolrParams params, SolrQueryResponse rsp,
|
||||||
SolrQueryRequest req) {
|
SolrQueryRequest req) {
|
||||||
try {
|
try {
|
||||||
|
@ -487,7 +567,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
result.add(fileMeta);
|
result.add(fileMeta);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
rsp.add("status", "unable to get file names for given index generation");
|
rsp.add("status", "unable to get file names for given index generation");
|
||||||
rsp.add("exception", e);
|
rsp.add(EXCEPTION, e);
|
||||||
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
|
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
|
||||||
} finally {
|
} finally {
|
||||||
if (dir != null) {
|
if (dir != null) {
|
||||||
|
@ -1106,6 +1186,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
@Override
|
@Override
|
||||||
public void postClose(SolrCore core) {}
|
public void postClose(SolrCore core) {}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
core.addCloseHook(new CloseHook() {
|
||||||
|
@Override
|
||||||
|
public void preClose(SolrCore core) {
|
||||||
|
ExecutorUtil.shutdownNowAndAwaitTermination(restoreExecutor);
|
||||||
|
if (restoreFuture != null) {
|
||||||
|
restoreFuture.cancel(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void postClose(SolrCore core) {}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1407,6 +1500,14 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final String LOCATION = "location";
|
||||||
|
|
||||||
|
private static final String SUCCESS = "success";
|
||||||
|
|
||||||
|
private static final String FAILED = "failed";
|
||||||
|
|
||||||
|
private static final String EXCEPTION = "exception";
|
||||||
|
|
||||||
public static final String MASTER_URL = "masterUrl";
|
public static final String MASTER_URL = "masterUrl";
|
||||||
|
|
||||||
public static final String STATUS = "status";
|
public static final String STATUS = "status";
|
||||||
|
@ -1417,6 +1518,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||||
|
|
||||||
public static final String CMD_BACKUP = "backup";
|
public static final String CMD_BACKUP = "backup";
|
||||||
|
|
||||||
|
public static final String CMD_RESTORE = "restore";
|
||||||
|
|
||||||
|
public static final String CMD_RESTORE_STATUS = "restorestatus";
|
||||||
|
|
||||||
public static final String CMD_FETCH_INDEX = "fetchindex";
|
public static final String CMD_FETCH_INDEX = "fetchindex";
|
||||||
|
|
||||||
public static final String CMD_ABORT_FETCH = "abortfetch";
|
public static final String CMD_ABORT_FETCH = "abortfetch";
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
package org.apache.solr.handler;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.core.DirectoryFactory;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class RestoreCore implements Callable<Boolean> {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(RestoreCore.class.getName());
|
||||||
|
|
||||||
|
private final String backupName;
|
||||||
|
private final String backupLocation;
|
||||||
|
private final SolrCore core;
|
||||||
|
|
||||||
|
public RestoreCore(SolrCore core, String location, String name) {
|
||||||
|
this.core = core;
|
||||||
|
this.backupLocation = location;
|
||||||
|
this.backupName = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boolean call() throws Exception {
|
||||||
|
return doRestore();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean doRestore() throws Exception {
|
||||||
|
|
||||||
|
Path backupPath = Paths.get(backupLocation, backupName);
|
||||||
|
String restoreIndexName = "restore." + backupName;
|
||||||
|
Path restoreIndexPath = Paths.get(core.getDataDir(), restoreIndexName);
|
||||||
|
|
||||||
|
Directory restoreIndexDir = null;
|
||||||
|
Directory indexDir = null;
|
||||||
|
try (Directory backupDir = FSDirectory.open(backupPath)) {
|
||||||
|
|
||||||
|
restoreIndexDir = core.getDirectoryFactory().get(restoreIndexPath.toString(),
|
||||||
|
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||||
|
|
||||||
|
//Prefer local copy.
|
||||||
|
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
|
||||||
|
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||||
|
|
||||||
|
//Move all files from backupDir to restoreIndexDir
|
||||||
|
for (String filename : backupDir.listAll()) {
|
||||||
|
checkInterrupted();
|
||||||
|
log.info("Copying over file to restore directory " + filename);
|
||||||
|
try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) {
|
||||||
|
long checksum = CodecUtil.retrieveChecksum(indexInput);
|
||||||
|
long length = indexInput.length();
|
||||||
|
IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, filename, length, checksum);
|
||||||
|
if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si")
|
||||||
|
|| filename.endsWith(".liv") || filename.startsWith("segments_")))) {
|
||||||
|
restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE);
|
||||||
|
} else {
|
||||||
|
//prefer local copy
|
||||||
|
restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.debug("Switching directories");
|
||||||
|
IndexFetcher.modifyIndexProps(core, restoreIndexName);
|
||||||
|
|
||||||
|
boolean success;
|
||||||
|
try {
|
||||||
|
core.getUpdateHandler().newIndexWriter(false);
|
||||||
|
openNewSearcher();
|
||||||
|
success = true;
|
||||||
|
log.info("Successfully restored to the backup index");
|
||||||
|
} catch (Exception e) {
|
||||||
|
//Rollback to the old index directory. Delete the restore index directory and mark the restore as failed.
|
||||||
|
log.info("Could not switch to restored index. Rolling back to the current index");
|
||||||
|
Directory dir = null;
|
||||||
|
try {
|
||||||
|
dir = core.getDirectoryFactory().get(core.getDataDir(), DirectoryFactory.DirContext.META_DATA,
|
||||||
|
core.getSolrConfig().indexConfig.lockType);
|
||||||
|
dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
|
||||||
|
} finally {
|
||||||
|
if (dir != null) {
|
||||||
|
core.getDirectoryFactory().release(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
core.getDirectoryFactory().doneWithDirectory(restoreIndexDir);
|
||||||
|
core.getDirectoryFactory().remove(restoreIndexDir);
|
||||||
|
core.getUpdateHandler().newIndexWriter(false);
|
||||||
|
openNewSearcher();
|
||||||
|
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
|
||||||
|
}
|
||||||
|
if (success) {
|
||||||
|
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||||
|
core.getDirectoryFactory().remove(indexDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} finally {
|
||||||
|
if (restoreIndexDir != null) {
|
||||||
|
core.getDirectoryFactory().release(restoreIndexDir);
|
||||||
|
}
|
||||||
|
if (indexDir != null) {
|
||||||
|
core.getDirectoryFactory().release(indexDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkInterrupted() throws InterruptedException {
|
||||||
|
if (Thread.currentThread().isInterrupted()) {
|
||||||
|
throw new InterruptedException("Stopping restore process. Thread was interrupted.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void openNewSearcher() throws Exception {
|
||||||
|
Future[] waitSearcher = new Future[1];
|
||||||
|
core.getSearcher(true, false, waitSearcher, true);
|
||||||
|
if (waitSearcher[0] != null) {
|
||||||
|
waitSearcher[0].get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,6 +18,7 @@ package org.apache.solr.handler;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -58,10 +59,11 @@ public class SnapShooter {
|
||||||
|
|
||||||
public SnapShooter(SolrCore core, String location, String snapshotName) {
|
public SnapShooter(SolrCore core, String location, String snapshotName) {
|
||||||
solrCore = core;
|
solrCore = core;
|
||||||
if (location == null) snapDir = core.getDataDir();
|
if (location == null) {
|
||||||
|
snapDir = core.getDataDir();
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
File base = new File(core.getCoreDescriptor().getInstanceDir());
|
snapDir = Paths.get(core.getCoreDescriptor().getInstanceDir()).resolve(location).toAbsolutePath().toString();
|
||||||
snapDir = org.apache.solr.util.FileUtils.resolvePath(base, location).getAbsolutePath();
|
|
||||||
}
|
}
|
||||||
this.snapshotName = snapshotName;
|
this.snapshotName = snapshotName;
|
||||||
|
|
||||||
|
@ -125,7 +127,7 @@ public class SnapShooter {
|
||||||
}
|
}
|
||||||
|
|
||||||
void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) {
|
void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) {
|
||||||
LOG.info("Creating backup snapshot...");
|
LOG.info("Creating backup snapshot " + (snapshotName == null ? "<not named>" : snapshotName));
|
||||||
NamedList<Object> details = new NamedList<>();
|
NamedList<Object> details = new NamedList<>();
|
||||||
details.add("startTime", new Date().toString());
|
details.add("startTime", new Date().toString());
|
||||||
try {
|
try {
|
||||||
|
@ -193,31 +195,6 @@ public class SnapShooter {
|
||||||
replicationHandler.snapShootDetails = details;
|
replicationHandler.snapShootDetails = details;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class OldBackupDirectory implements Comparable<OldBackupDirectory>{
|
|
||||||
File dir;
|
|
||||||
Date timestamp;
|
|
||||||
final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
|
|
||||||
|
|
||||||
OldBackupDirectory(File dir) {
|
|
||||||
if(dir.isDirectory()) {
|
|
||||||
Matcher m = dirNamePattern.matcher(dir.getName());
|
|
||||||
if(m.find()) {
|
|
||||||
try {
|
|
||||||
this.dir = dir;
|
|
||||||
this.timestamp = new SimpleDateFormat(DATE_FMT, Locale.ROOT).parse(m.group(1));
|
|
||||||
} catch(Exception e) {
|
|
||||||
this.dir = null;
|
|
||||||
this.timestamp = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int compareTo(OldBackupDirectory that) {
|
|
||||||
return that.timestamp.compareTo(this.timestamp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final String DATE_FMT = "yyyyMMddHHmmssSSS";
|
public static final String DATE_FMT = "yyyyMMddHHmmssSSS";
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testBackupOnCommit() throws Exception {
|
public void testBackupOnCommit() throws Exception {
|
||||||
//Index
|
//Index
|
||||||
int nDocs = indexDocs();
|
int nDocs = indexDocs(masterClient);
|
||||||
|
|
||||||
//Confirm if completed
|
//Confirm if completed
|
||||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient);
|
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient);
|
||||||
|
@ -146,7 +146,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int indexDocs() throws IOException, SolrServerException {
|
protected static int indexDocs(SolrClient masterClient) throws IOException, SolrServerException {
|
||||||
int nDocs = TestUtil.nextInt(random(), 1, 100);
|
int nDocs = TestUtil.nextInt(random(), 1, 100);
|
||||||
masterClient.deleteByQuery("*:*");
|
masterClient.deleteByQuery("*:*");
|
||||||
for (int i = 0; i < nDocs; i++) {
|
for (int i = 0; i < nDocs; i++) {
|
||||||
|
@ -164,7 +164,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
@Test
|
@Test
|
||||||
public void doTestBackup() throws Exception {
|
public void doTestBackup() throws Exception {
|
||||||
|
|
||||||
int nDocs = indexDocs();
|
int nDocs = indexDocs(masterClient);
|
||||||
|
|
||||||
Path[] snapDir = new Path[5]; //One extra for the backup on commit
|
Path[] snapDir = new Path[5]; //One extra for the backup on commit
|
||||||
//First snapshot location
|
//First snapshot location
|
||||||
|
@ -180,17 +180,16 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
backupNames = new String[4];
|
backupNames = new String[4];
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
BackupCommand backupCommand;
|
|
||||||
final String backupName = TestUtil.randomSimpleString(random(), 1, 20);
|
final String backupName = TestUtil.randomSimpleString(random(), 1, 20);
|
||||||
if (!namedBackup) {
|
if (!namedBackup) {
|
||||||
backupCommand = new BackupCommand(addNumberToKeepInRequest, backupKeepParamName, ReplicationHandler.CMD_BACKUP);
|
if (addNumberToKeepInRequest) {
|
||||||
|
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&" + backupKeepParamName + "=2");
|
||||||
} else {
|
} else {
|
||||||
backupCommand = new BackupCommand(backupName, ReplicationHandler.CMD_BACKUP);
|
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "");
|
||||||
backupNames[i] = backupName;
|
|
||||||
}
|
}
|
||||||
backupCommand.runCommand();
|
} else {
|
||||||
if (backupCommand.fail != null) {
|
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&name=" + backupName);
|
||||||
fail(backupCommand.fail);
|
backupNames[i] = backupName;
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp);
|
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp);
|
||||||
|
@ -253,8 +252,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException {
|
private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException {
|
||||||
String lastTimestamp = null;
|
String lastTimestamp = null;
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
BackupCommand deleteBackupCommand = new BackupCommand(backupNames[i], ReplicationHandler.CMD_DELETE_BACKUP);
|
runBackupCommand(masterJetty, ReplicationHandler.CMD_DELETE_BACKUP, "&name=" +backupNames[i]);
|
||||||
deleteBackupCommand.runCommand();
|
|
||||||
CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp);
|
CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp);
|
||||||
while (true) {
|
while (true) {
|
||||||
boolean success = checkDeleteBackupStatus.fetchStatus();
|
boolean success = checkDeleteBackupStatus.fetchStatus();
|
||||||
|
@ -267,53 +265,20 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
}
|
}
|
||||||
Thread.sleep(200);
|
Thread.sleep(200);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deleteBackupCommand.fail != null) {
|
|
||||||
fail(deleteBackupCommand.fail);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class BackupCommand {
|
public static void runBackupCommand(JettySolrRunner masterJetty, String cmd, String params) throws IOException {
|
||||||
String fail = null;
|
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME
|
||||||
final boolean addNumberToKeepInRequest;
|
+ "/replication?command=" + cmd + params;
|
||||||
String backupKeepParamName;
|
|
||||||
String backupName;
|
|
||||||
String cmd;
|
|
||||||
|
|
||||||
BackupCommand(boolean addNumberToKeepInRequest, String backupKeepParamName, String command) {
|
|
||||||
this.addNumberToKeepInRequest = addNumberToKeepInRequest;
|
|
||||||
this.backupKeepParamName = backupKeepParamName;
|
|
||||||
this.cmd = command;
|
|
||||||
}
|
|
||||||
BackupCommand(String backupName, String command) {
|
|
||||||
this.backupName = backupName;
|
|
||||||
addNumberToKeepInRequest = false;
|
|
||||||
this.cmd = command;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void runCommand() {
|
|
||||||
String masterUrl;
|
|
||||||
if(backupName != null) {
|
|
||||||
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
|
|
||||||
"&name=" + backupName;
|
|
||||||
} else {
|
|
||||||
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
|
|
||||||
(addNumberToKeepInRequest ? "&" + backupKeepParamName + "=2" : "");
|
|
||||||
}
|
|
||||||
|
|
||||||
InputStream stream = null;
|
InputStream stream = null;
|
||||||
try {
|
try {
|
||||||
URL url = new URL(masterUrl);
|
URL url = new URL(masterUrl);
|
||||||
stream = url.openStream();
|
stream = url.openStream();
|
||||||
stream.close();
|
stream.close();
|
||||||
} catch (Exception e) {
|
|
||||||
fail = e.getMessage();
|
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.closeQuietly(stream);
|
IOUtils.closeQuietly(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class CheckDeleteBackupStatus {
|
private class CheckDeleteBackupStatus {
|
||||||
|
@ -349,6 +314,6 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
|
||||||
IOUtils.closeQuietly(stream);
|
IOUtils.closeQuietly(stream);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,243 @@
|
||||||
|
package org.apache.solr.handler;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.SolrJettyTestBase;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.util.FileUtils;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@SolrTestCaseJ4.SuppressSSL // Currently unknown why SSL does not work with this test
|
||||||
|
public class TestRestoreCore extends SolrJettyTestBase {
|
||||||
|
|
||||||
|
JettySolrRunner masterJetty;
|
||||||
|
TestReplicationHandler.SolrInstance master = null;
|
||||||
|
SolrClient masterClient;
|
||||||
|
|
||||||
|
private static final String CONF_DIR = "solr" + File.separator + "collection1" + File.separator + "conf"
|
||||||
|
+ File.separator;
|
||||||
|
|
||||||
|
private static String context = "/solr";
|
||||||
|
|
||||||
|
private static JettySolrRunner createJetty(TestReplicationHandler.SolrInstance instance) throws Exception {
|
||||||
|
FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), new File(instance.getHomeDir(), "solr.xml"));
|
||||||
|
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), "/solr", 0);
|
||||||
|
jetty.setDataDir(instance.getDataDir());
|
||||||
|
jetty.start();
|
||||||
|
return jetty;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static SolrClient createNewSolrClient(int port) {
|
||||||
|
try {
|
||||||
|
// setup the client...
|
||||||
|
HttpSolrClient client = new HttpSolrClient(buildUrl(port, context) + "/" + DEFAULT_TEST_CORENAME);
|
||||||
|
client.setConnectionTimeout(15000);
|
||||||
|
client.setSoTimeout(60000);
|
||||||
|
client.setDefaultMaxConnectionsPerHost(100);
|
||||||
|
client.setMaxTotalConnections(100);
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
String configFile = "solrconfig-master.xml";
|
||||||
|
|
||||||
|
master = new TestReplicationHandler.SolrInstance(createTempDir("solr-instance").toFile(), "master", null);
|
||||||
|
master.setUp();
|
||||||
|
master.copyConfigFile(CONF_DIR + configFile, "solrconfig.xml");
|
||||||
|
|
||||||
|
masterJetty = createJetty(master);
|
||||||
|
masterClient = createNewSolrClient(masterJetty.getLocalPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
super.tearDown();
|
||||||
|
masterClient.close();
|
||||||
|
masterClient = null;
|
||||||
|
masterJetty.stop();
|
||||||
|
masterJetty = null;
|
||||||
|
master = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimpleRestore() throws Exception {
|
||||||
|
|
||||||
|
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||||
|
|
||||||
|
String snapshotName;
|
||||||
|
String location;
|
||||||
|
String params = "";
|
||||||
|
|
||||||
|
//Use the default backup location or an externally provided location.
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
location = createTempDir().toFile().getAbsolutePath();
|
||||||
|
params += "&location=" + URLEncoder.encode(location, "UTF-8");
|
||||||
|
}
|
||||||
|
|
||||||
|
//named snapshot vs default snapshot name
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
|
||||||
|
params += "&name=" + snapshotName;
|
||||||
|
}
|
||||||
|
|
||||||
|
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
|
||||||
|
|
||||||
|
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
|
||||||
|
while (!checkBackupStatus.success) {
|
||||||
|
checkBackupStatus.fetchStatus();
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Modify existing index before we call restore.
|
||||||
|
|
||||||
|
//Delete a few docs
|
||||||
|
int numDeletes = TestUtil.nextInt(random(), 1, nDocs);
|
||||||
|
for(int i=0; i<numDeletes; i++) {
|
||||||
|
masterClient.deleteByQuery("id:" + i);
|
||||||
|
}
|
||||||
|
masterClient.commit();
|
||||||
|
|
||||||
|
//Add a few more
|
||||||
|
int moreAdds = TestUtil.nextInt(random(), 1, 100);
|
||||||
|
for (int i=0; i<moreAdds; i++) {
|
||||||
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
|
doc.addField("id", i + nDocs);
|
||||||
|
doc.addField("name", "name = " + (i + nDocs));
|
||||||
|
masterClient.add(doc);
|
||||||
|
}
|
||||||
|
//Purposely not calling commit once in a while. There can be some docs which are not committed
|
||||||
|
if (usually()) {
|
||||||
|
masterClient.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
|
||||||
|
|
||||||
|
while (!fetchRestoreStatus()) {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
//See if restore was successful by checking if all the docs are present again
|
||||||
|
verifyDocs(nDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailedRestore() throws Exception {
|
||||||
|
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||||
|
|
||||||
|
String location = createTempDir().toFile().getAbsolutePath();
|
||||||
|
String snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
|
||||||
|
String params = "&name=" + snapshotName + "&location=" + URLEncoder.encode(location, "UTF-8");
|
||||||
|
|
||||||
|
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
|
||||||
|
|
||||||
|
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
|
||||||
|
while (!checkBackupStatus.success) {
|
||||||
|
checkBackupStatus.fetchStatus();
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Remove the segments_n file so that the backup index is corrupted.
|
||||||
|
//Restore should fail and it should automatically rollback to the original index.
|
||||||
|
Path restoreIndexPath = Paths.get(location, "snapshot." + snapshotName);
|
||||||
|
Path segmentFileName = Files.newDirectoryStream(restoreIndexPath, IndexFileNames.SEGMENTS + "*").iterator().next();
|
||||||
|
Files.delete(segmentFileName);
|
||||||
|
|
||||||
|
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (!fetchRestoreStatus()) {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
fail("Should have thrown an error because restore could not have been successful");
|
||||||
|
} catch (AssertionError e) {
|
||||||
|
//supposed to happen
|
||||||
|
}
|
||||||
|
|
||||||
|
verifyDocs(nDocs);
|
||||||
|
|
||||||
|
//make sure we can write to the index again
|
||||||
|
nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
|
||||||
|
verifyDocs(nDocs);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyDocs(int nDocs) throws SolrServerException, IOException {
|
||||||
|
ModifiableSolrParams queryParams = new ModifiableSolrParams();
|
||||||
|
queryParams.set("q", "*:*");
|
||||||
|
QueryResponse response = masterClient.query(queryParams);
|
||||||
|
|
||||||
|
assertEquals(0, response.getStatus());
|
||||||
|
assertEquals(nDocs, response.getResults().getNumFound());
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean fetchRestoreStatus() throws IOException {
|
||||||
|
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME +
|
||||||
|
"/replication?command=" + ReplicationHandler.CMD_RESTORE_STATUS;
|
||||||
|
final Pattern pException = Pattern.compile("<str name=\"exception\">(.*?)</str>");
|
||||||
|
|
||||||
|
InputStream stream = null;
|
||||||
|
try {
|
||||||
|
URL url = new URL(masterUrl);
|
||||||
|
stream = url.openStream();
|
||||||
|
String response = IOUtils.toString(stream, "UTF-8");
|
||||||
|
if(pException.matcher(response).find()) {
|
||||||
|
fail("Failed to complete restore action");
|
||||||
|
}
|
||||||
|
if(response.contains("<str name=\"status\">success</str>")) {
|
||||||
|
return true;
|
||||||
|
} else if (response.contains("<str name=\"status\">failed</str>")){
|
||||||
|
fail("Restore Failed");
|
||||||
|
}
|
||||||
|
stream.close();
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeQuietly(stream);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1 +0,0 @@
|
||||||
0ec13f6423eb6d5858e229939a2bc118473ef94c
|
|
|
@ -0,0 +1 @@
|
||||||
|
016d0bc512222f1253ee6b64d389c84e22f697f0
|
|
@ -1 +0,0 @@
|
||||||
11393498b38e9695d0850cac26fde5613ae268b9
|
|
|
@ -0,0 +1 @@
|
||||||
|
f5aa318bda4c6c8d688c9d00b90681dcd82ce636
|
|
@ -1 +0,0 @@
|
||||||
f7899276dddd01d8a42ecfe27e7031fcf9824422
|
|
|
@ -0,0 +1 @@
|
||||||
|
2f8757f5ac5e38f46c794e5229d1f3c522e9b1df
|
Loading…
Reference in New Issue